twin_h2_c2_comparison_diff_twin_match <- readRDS(file=‘~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/revised_analysis/MaTCHComparisonAnalysis/twin_h2_c2_comparison_diff_twin_match.rds’)— title: “Aetna Twins” author: “Chirag Lakhani” date: “8/9/2018” output: html_document —

Functions for Analysis

Generate Dataframes

Read Data and setup allphewas_both

Create allphewas_both_zip

Create allphewas_both_zip_fixed_effect

allphewas_binary_zipcode_fixed_effect <- read_delim("~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/twin_binary_zipcode_fixed_effect.csv", 
    " ", escape_double = FALSE, col_names = FALSE, 
    trim_ws = TRUE)
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   X1 = col_character(),
##   X2 = col_integer(),
##   X3 = col_integer(),
##   X4 = col_integer(),
##   X5 = col_integer(),
##   X6 = col_integer(),
##   X7 = col_integer(),
##   X8 = col_integer(),
##   X9 = col_integer(),
##   X61 = col_integer(),
##   X62 = col_integer(),
##   X63 = col_integer(),
##   X64 = col_integer(),
##   X65 = col_integer()
## )
## See spec(...) for full column specifications.
names(allphewas_binary_zipcode_fixed_effect) <- readRDS('~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/names_binary_zipcode_fixed_effect.rds')


allphewas_binary_zipcode_fixed_effect <- allphewas_binary_zipcode_fixed_effect %>%
  mutate(h2_liab_SE_se = ifelse(phewas_code == '984',h2_SE,h2_liab_SE_se)) %>%
  mutate(rliabss_SE_se = ifelse(phewas_code == '984',r_SS_SE,rliabss_SE_se)) %>%
  mutate(c2_liab_SE_se = ifelse(phewas_code == '984',c2_SE,c2_liab_SE_se)) %>%
    mutate(rliabos_SE_se = ifelse(phewas_code == '984',r_OS_SE,rliabos_SE_se)) 






names(allphewas_binary_zipcode_fixed_effect) <- sub("_se", "", names(allphewas_binary_zipcode_fixed_effect))



allphewas_binary_zipcode_fixed_effect <- allphewas_binary_zipcode_fixed_effect %>% inner_join(phewas, ., by='phewas_code') 
## Warning: Column `phewas_code` joining factor and character vector, coercing
## into character vector
missingDFZipcode_fixed_effect <- allphewas_binary_zipcode_fixed_effect  %>% 
  right_join(.,allphewas_binary %>% select(phewas_code), by='phewas_code') %>% 
  filter(is.na(phewas_description)) %>% select(phewas_code)



saveRDS(missingDFZipcode_fixed_effect, file='~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/orchestraAnalysis/missingDFZipcode_fixed_effect.rds')

FDR Correction

  allphewas_both <- allphewas_both %>%
  mutate(h2_liab.zscore=h2_liab/h2_liab_SE) %>%
                  mutate(c2_liab.zscore=c2_liab/c2_liab_SE) %>%
                  mutate(h2_liab.pvalue=2*pnorm(-abs(h2_liab.zscore))) %>%
                  mutate(c2_liab.pvalue=2*pnorm(-abs(c2_liab.zscore))) %>%
                  mutate(pvalue_h2_FDR = p.adjust(.$h2_liab.pvalue, method='BY')) %>%
                  mutate(pvalue_c2_FDR = p.adjust(.$c2_liab.pvalue, method='BY')) %>%
                    mutate(pvalue_h2_bonferroni = p.adjust(.$h2_liab.pvalue, method='bonferroni')) %>%
                  mutate(pvalue_c2_bonferroni = p.adjust(.$c2_liab.pvalue, method='bonferroni')) %>%
                  mutate(observed_h2 = -log10(h2_liab.pvalue) ) %>%
                  mutate(observed_c2 = -log10(c2_liab.pvalue) ) %>%
                  mutate(rliabss_liab.zscore=rliabss/rliabss_SE) %>%
                  mutate(rliabos_liab.zscore=rliabos/rliabos_SE) %>%
                  mutate(rliabss_liab.pvalue=2*pnorm(-abs(rliabss_liab.zscore))) %>%
                  mutate(rliabos_liab.pvalue=2*pnorm(-abs(rliabos_liab.zscore))) %>%
                  mutate(observed_rliabss = -log10(rliabss_liab.pvalue) ) %>%
                  mutate(observed_rliabos = -log10(rliabos_liab.pvalue) ) %>%
    mutate(tot_e2=h2_liab+c2_liab) %>%
  mutate(tot_e2_SE=sqrt(h2_liab_SE^2+c2_liab_SE^2)) %>%
  mutate(tot_e2.zscore=tot_e2/tot_e2_SE) %>%
  mutate(tot_e2.pvalue=2*pnorm(-abs(tot_e2.zscore))) %>%
  mutate(tot_e2.pvalue_FDR = p.adjust(tot_e2.pvalue, method='BY')) %>%
  mutate(observed_tot_e2 = -log10(tot_e2.pvalue) ) %>%
    mutate(e2=1-h2_liab-c2_liab) %>%
  mutate(e2_SE=sqrt(h2_liab_SE^2 + c2_liab_SE^2)) %>%
  mutate(e2.zscore=e2/e2_SE) %>%
  mutate(e2.pvalue=2*pnorm(-abs(e2.zscore))) %>%
  mutate(e2.pvalue_FDR = p.adjust(e2.pvalue, method='BY')) %>%
  mutate(observed_e2 = -log10(e2.pvalue) ) %>%
  mutate(beta_gender.zscore =`as.factor(Gender)M`/`as.factor(Gender)M_SE`) %>%
  mutate(beta_gender.pvalue=2*pnorm(-abs(beta_gender.zscore))) %>%
  mutate(pvalue_beta_gender_FDR = p.adjust(.$beta_gender.pvalue, method='BY')) %>%
  mutate(beta_age.zscore =MemberBirthYear/MemberBirthYear_SE) %>%
  mutate(beta_age.pvalue=2*pnorm(-abs(beta_age.zscore))) %>%
  mutate(pvalue_beta_age_FDR = p.adjust(.$beta_age.pvalue, method='BY')) 




  allphewas_both_zipcode <- allphewas_both_zipcode %>%
  mutate(h2_liab.zscore=h2_liab/h2_liab_SE) %>%
                  mutate(c2_liab.zscore=c2_liab/c2_liab_SE) %>%
                  mutate(h2_liab.pvalue=2*pnorm(-abs(h2_liab.zscore))) %>%
                  mutate(c2_liab.pvalue=2*pnorm(-abs(c2_liab.zscore))) %>%
                  mutate(pvalue_h2_FDR = p.adjust(.$h2_liab.pvalue, method='BY')) %>%
                  mutate(pvalue_c2_FDR = p.adjust(.$c2_liab.pvalue, method='BY')) %>%
                  mutate(observed_h2 = -log10(h2_liab.pvalue) ) %>%
                  mutate(observed_c2 = -log10(c2_liab.pvalue) ) %>%
                  mutate(rliabss_liab.zscore=rliabss/rliabss_SE) %>%
                  mutate(rliabos_liab.zscore=rliabos/rliabos_SE) %>%
                  mutate(rliabss_liab.pvalue=2*pnorm(-abs(rliabss_liab.zscore))) %>%
                  mutate(rliabos_liab.pvalue=2*pnorm(-abs(rliabos_liab.zscore))) %>%
                  mutate(observed_rliabss = -log10(rliabss_liab.pvalue) ) %>%
                  mutate(observed_rliabos = -log10(rliabos_liab.pvalue) ) %>%
    mutate(tot_e2=h2_liab+c2_liab) %>%
  mutate(tot_e2_SE=sqrt(h2_liab_SE^2+c2_liab_SE^2)) %>%
  mutate(tot_e2.zscore=tot_e2/tot_e2_SE) %>%
  mutate(tot_e2.pvalue=2*pnorm(-abs(tot_e2.zscore))) %>%
  mutate(tot_e2.pvalue_FDR = p.adjust(tot_e2.pvalue, method='BY')) %>%
  mutate(observed_tot_e2 = -log10(tot_e2.pvalue) ) %>%
    mutate(e2=1-h2_liab-c2_liab-rliabincome-rliabtemp-rliabaqi) %>%
  mutate(e2_SE=sqrt(h2_liab_SE^2+c2_liab_SE^2 + rliabincome_SE^2 + rliabaqi_SE^2 + rliabtemp_SE^2  )) %>%
  mutate(e2.zscore=e2/e2_SE) %>%
  mutate(e2.pvalue=2*pnorm(-abs(e2.zscore))) %>%
  mutate(e2.pvalue_FDR = p.adjust(e2.pvalue, method='BY')) %>%
  mutate(observed_e2 = -log10(e2.pvalue) ) %>%
                      mutate(rliabincome.zscore=rliabincome/rliabincome_SE) %>%
                  mutate(rliabincome.pvalue=2*pnorm(-abs(rliabincome.zscore))) %>%
                  mutate(rliabincome.pvalue_FDR = p.adjust(rliabincome.pvalue, method='BY')) %>%
                  mutate(observed_rliabincome = -log10(rliabincome.pvalue) ) %>%
mutate(rliabaqi.zscore=rliabaqi/rliabaqi_SE) %>%
  mutate(rliabaqi.pvalue=2*pnorm(-abs(rliabaqi.zscore))) %>%
  mutate(rliabaqi.pvalue_FDR = p.adjust(rliabaqi.pvalue, method='BY')) %>%
  mutate(observed_rliabaqi = -log10(rliabaqi.pvalue) ) %>%
mutate(rliabtemp.zscore=rliabtemp/rliabtemp_SE) %>%
  mutate(rliabtemp.pvalue=2*pnorm(-abs(rliabtemp.zscore))) %>%
  mutate(rliabtemp.pvalue_FDR = p.adjust(rliabtemp.pvalue, method='BY')) %>%
  mutate(observed_rliabtemp = -log10(rliabtemp.pvalue) ) 

  
  rm(cnt_df, cost_df, allphewas_binary, allphewas_binary_zipcode,allphewas_quant_pheno, allphewas_quant_zipcode, phewas)

Short Names

allphewas_both <- allphewas_both %>% mutate(shortName=phewas_description) %>%
    mutate(shortName=ifelse(phewas_code =='313.1','ADHD',shortName)) %>%
    mutate(shortName=ifelse(phewas_code =='134577','LDL Cholesterol',shortName)) %>%
    mutate(shortName=ifelse(phewas_code =='706.1','Acne',shortName)) %>%
    mutate(shortName=ifelse(phewas_code =='495','Asthma',shortName)) %>%
    mutate(shortName=ifelse(phewas_code =='637','Low Birthweight',shortName)) %>%
    mutate(shortName=ifelse(phewas_code =='464','Acute Sinusitis',shortName)) %>%
  mutate(shortName=ifelse(phewas_code =='984','Lead Poisoning',shortName)) %>%
  mutate(shortName=ifelse(phewas_code =='871.1','Hand Wound',shortName)) %>%
  mutate(shortName=ifelse(phewas_code =='871.3','Foot Wound',shortName)) %>%
  mutate(shortName=ifelse(phewas_code =='800.3','Tibia/Fibula Fracture',shortName)) %>%
  mutate(shortName=ifelse(phewas_code =='726.3','Bursitis',shortName)) %>%
  mutate(shortName=ifelse(phewas_code =='696.4','Psoriasis',shortName)) %>%
    mutate(shortName=ifelse(phewas_code =='696.4','Psoriasis',shortName)) %>%
    mutate(shortName=ifelse(phewas_code =='7187','Hemoglobin',shortName)) %>%
    mutate(shortName=ifelse(phewas_code =='362.1','Retinopathy',shortName)) %>%
    mutate(shortName = ifelse(phewas_code =='313','Pervasive Development Disorder',shortName)) %>%
  mutate(shortName = ifelse(phewas_code =='313.1','ADHD',shortName)) %>%
  mutate(shortName = ifelse(phewas_code =='COST','Cost',shortName)) %>%
  mutate(shortName = ifelse(phewas_code =='CNT','Comorbids',shortName)) %>%
  mutate(shortName = ifelse(phewas_code =='984','Lead Poisoning',shortName)) %>%
    mutate(shortName = ifelse(phewas_code =='367','Low Vision',shortName)) %>%
      mutate(shortName = ifelse(phewas_code =='769','Nonallopathic Lesion',shortName)) %>%
        mutate(shortName = ifelse(phewas_code =='465','Respiratory Infections',shortName)) %>%
      mutate(shortName = ifelse(phewas_code =='870','Head Wound',shortName)) %>%
      mutate(shortName = ifelse(phewas_code =='381','Otitis Media',shortName)) %>%
  mutate(shortName = ifelse(phewas_code =='474.2','Chronic Tonsillitis',shortName)) %>%
    mutate(shortName = ifelse(phewas_code =='524','Dentofacial Anomalies',shortName)) %>%
    mutate(shortName = ifelse(phewas_code =='930','Food Allergy',shortName)) %>%
    mutate(shortName = ifelse(phewas_code =='939','Atopic/Contact Dermatitis',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='706','Sebaceous Gland',shortName)) %>%
  mutate(shortName = ifelse(phewas_code =='726','Peripheral Enthesopathies',shortName)) %>%
  mutate(shortName = ifelse(phewas_code =='859','Complication Due to Implant',shortName)) %>%
    mutate(shortName = ifelse(phewas_code =='343','Cerebral Palsy',shortName)) %>%
    mutate(shortName = ifelse(phewas_code =='316','Substance Addiction',shortName)) %>%
      mutate(shortName = ifelse(phewas_code =='555','Inflammatory Bowel Disease',shortName)) %>%
    mutate(shortName = ifelse(phewas_code =='345','Epilepsy, recurrent seizures',shortName)) %>%
      mutate(shortName = ifelse(phewas_code =='758','Chromosomal anomalies/genetic disorders',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='747','Cardiac/circulatory congenital anomalies',shortName)) %>%
  mutate(shortName = ifelse(phewas_code =='656.2','Respiratory Condition Newborn',shortName)) %>%
    mutate(shortName = ifelse(phewas_code =='656','Perinatal Condition Newborn',shortName)) 

    
    
    
    
    
    
    
    
allphewas_both_zipcode <- allphewas_both_zipcode %>% mutate(shortName=phewas_description) %>%
    mutate(shortName=ifelse(phewas_code =='313.1','ADHD',shortName)) %>%
    mutate(shortName=ifelse(phewas_code =='134577','LDL Cholesterol',shortName)) %>%
    mutate(shortName=ifelse(phewas_code =='706.1','Acne',shortName)) %>%
    mutate(shortName=ifelse(phewas_code =='495','Asthma',shortName)) %>%
    mutate(shortName=ifelse(phewas_code =='637','Low Birthweight',shortName)) %>%
    mutate(shortName=ifelse(phewas_code =='464','Acute Sinusitis',shortName)) %>%
  mutate(shortName=ifelse(phewas_code =='984','Lead Poisoning',shortName)) %>%
  mutate(shortName=ifelse(phewas_code =='871.1','Hand Wound',shortName)) %>%
  mutate(shortName=ifelse(phewas_code =='871.3','Foot Wound',shortName)) %>%
  mutate(shortName=ifelse(phewas_code =='800.3','Tibia/Fibula Fracture',shortName)) %>%
  mutate(shortName=ifelse(phewas_code =='726.3','Bursitis',shortName)) %>%
  mutate(shortName=ifelse(phewas_code =='696.4','Psoriasis',shortName)) %>%
    mutate(shortName=ifelse(phewas_code =='696.4','Psoriasis',shortName)) %>%
    mutate(shortName=ifelse(phewas_code =='7187','Hemoglobin',shortName)) %>%
    mutate(shortName=ifelse(phewas_code =='362.1','Retinopathy',shortName)) %>%
    mutate(shortName = ifelse(phewas_code =='313','Pervasive Development Disorder',shortName)) %>%
  mutate(shortName = ifelse(phewas_code =='313.1','ADHD',shortName)) %>%
  mutate(shortName = ifelse(phewas_code =='COST','Cost',shortName)) %>%
  mutate(shortName = ifelse(phewas_code =='CNT','Comorbids',shortName)) %>%
  mutate(shortName = ifelse(phewas_code =='984','Lead Poisoning',shortName)) %>%
    mutate(shortName = ifelse(phewas_code =='367','Low Vision',shortName)) %>%
      mutate(shortName = ifelse(phewas_code =='769','Nonallopathic Lesion',shortName)) %>%
        mutate(shortName = ifelse(phewas_code =='465','Respiratory Infections',shortName)) %>%
      mutate(shortName = ifelse(phewas_code =='870','Head Wound',shortName)) %>%
      mutate(shortName = ifelse(phewas_code =='381','Otitis Media',shortName)) %>%
  mutate(shortName = ifelse(phewas_code =='474.2','Chronic Tonsillitis',shortName)) %>%
    mutate(shortName = ifelse(phewas_code =='524','Dentofacial Anomalies',shortName)) %>%
    mutate(shortName = ifelse(phewas_code =='930','Food Allergy',shortName)) %>%
    mutate(shortName = ifelse(phewas_code =='939','Atopic/Contact Dermatitis',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='706','Sebaceous Gland',shortName)) %>%
  mutate(shortName = ifelse(phewas_code =='726','Peripheral Enthesopathies',shortName)) %>%
  mutate(shortName = ifelse(phewas_code =='859','Complication Due to Implant',shortName)) %>%
  mutate(shortName = ifelse(phewas_code =='345','Epilepsy/recurrent seizures',shortName)) %>%
  mutate(shortName = ifelse(phewas_code =='758','Chromosomal anomalies/genetic disorders',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='747','Cardiac/circulatory congenital anomalies',shortName)) %>%
  mutate(shortName = ifelse(phewas_code =='656.2','Respiratory Condition Newborn',shortName)) %>%
  mutate(shortName = ifelse(phewas_code =='656','Perinatal Condition Newborn',shortName)) 

Functional Domain Data frames

phewas_functional_domain_map_quant_trait <- quant_traits_subset %>% 
  inner_join(.,quant_pheno_description, by='phewas_code') %>%
  select(phewas_code, phewas_description=Description) %>% 
mutate(functional_domain = case_when(phewas_code == 'CNT' ~ 'PheWAS Comorbids',
                                     phewas_code == 'COST' ~ 'Avg. Monthly Cost',
                                     TRUE ~ 'Quantitative'))


phewas_functional_domain_map <- phewas_functional_domain_map %>% bind_rows(.,phewas_functional_domain_map_quant_trait)


phewas_functional_domain_map_all <- phewas_functional_domain_map %>% 
  select(phewas_code, phewas_description) %>% mutate(functional_domain = 'All Traits') %>% unique()

phewas_functional_domain_map <- phewas_functional_domain_map %>% bind_rows(.,phewas_functional_domain_map_all)



allphewas_both_functional_domain <- allphewas_both %>% 
  inner_join(.,phewas_functional_domain_map, by=c('phewas_code', 'phewas_description'))


allphewas_both_zipcode_functional_domain <- allphewas_both_zipcode %>% 
  inner_join(.,phewas_functional_domain_map, by=c('phewas_code', 'phewas_description'))


rm(phewas_functional_domain_map_quant_trait,phewas_functional_domain_map_all)

Analysis Overview

Number of Twin Pairs

Total Number of Phenotypes

totalPheno <- nrow(allphewas_both)

totalPheno
## [1] 561

Number of Twin Pairs by Pair Type

SE for p(mz|ss)

countTwin <- demographic_information %>% group_by(genderPairType) %>% tally() %>%  spread(.,genderPairType, n) %>%
  mutate(type = 'binary') %>% mutate(binYOB = 'All')


countTwin_birth <- demographic_information %>% 
  mutate(binYOB = cut(yearT1, breaks=c(1985,1996,2006,2016), dig.lab=4, include.lowest=TRUE, right=FALSE ) ) %>% group_by(binYOB,genderPairType) %>% tally() %>% spread(.,genderPairType, n) %>% ungroup()


countTwin_birth <- countTwin_birth %>% mutate(type='binary')


countTwin_all <- countTwin %>% bind_rows(.,countTwin_birth)
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
countTwin_all <- countTwin_all %>% mutate(SS=FF+MM) %>% mutate(OS = MF)



YearOfBirth <- readRDS('~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/analysis/quant_raw_data/revised/quant_pheno_YearOfBirthCount.rds')




count_quant_binYOB <- YearOfBirth %>% filter(phewas_code == '7187') %>% filter(cohort == 'subset') %>% mutate(binYOB = cut(yearBirth, breaks=c(1985,1996,2006,2016),include.lowest = TRUE,dig.lab=4, right=FALSE)) %>% 
  group_by(binYOB, genderConfig, cohort) %>% summarise(n=sum(numPairs)) %>%ungroup()  %>% select(-cohort) %>% spread(.,genderConfig,n) %>% mutate(type = 'quant')



count_quant_all <- YearOfBirth %>% filter(phewas_code == '7187') %>% filter(cohort == 'subset') %>% 
  group_by(genderConfig, cohort) %>% summarise(n=sum(numPairs)) %>%ungroup()  %>% select(-cohort) %>% spread(.,genderConfig,n) %>% mutate(type = 'quant') %>% mutate(binYOB = 'All')



count_everything <- countTwin_all %>% 
  bind_rows(.,count_quant_all,count_quant_binYOB) %>% 
  select(-MM,-MF,-FF) %>% mutate(n=SS+OS ) %>% 
  mutate(pss=SS/n,pmz=1-2*(OS/n)) %>% 
  mutate(p=pmz/pss) %>%
  mutate(var_p = (n*OS)/(SS^3)) %>% mutate(p_SE = sqrt(var_p)) %>%
  mutate(p_low = p-1.96*p_SE, p_high = p+1.96*p_SE) %>% select(-pss,-pmz,-var_p)
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
count_everything <- round_df(count_everything,3)

DT::datatable(count_everything)

Median Number of ICD codes

## # A tibble: 1 x 3
##   median  IQ25  IQ75
##    <dbl> <dbl> <dbl>
## 1     23    12    42
## # A tibble: 3 x 4
##   genderPairType median  IQ25  IQ75
##   <chr>           <dbl> <dbl> <dbl>
## 1 FF                 23    12    41
## 2 MF                 24    13    44
## 3 MM                 22    11    41

Median Number of Age Start year codes

## # A tibble: 1 x 3
##   median  IQ25  IQ75
##    <dbl> <dbl> <dbl>
## 1      7     3    13
## # A tibble: 3 x 4
##   genderPairType median  IQ25  IQ75
##   <chr>           <dbl> <dbl> <dbl>
## 1 FF                  8     3    14
## 2 MF                  7     2    12
## 3 MM                  8     3    13

Median Number of Month Enrollment

## # A tibble: 1 x 3
##   median  IQ25  IQ75
##    <dbl> <dbl> <dbl>
## 1     60    45    84
## # A tibble: 3 x 4
##   genderPairType median  IQ25  IQ75
##   <chr>           <dbl> <dbl> <dbl>
## 1 FF                 60    45    84
## 2 MF                 60    45    84
## 3 MM                 60    45    84

Distinct Zipcodes

## # A tibble: 1 x 1
##   Unique_Elements
##             <int>
## 1           11666
## # A tibble: 3 x 2
##   genderPairType Unique_Elements
##   <chr>                    <int>
## 1 FF                        7302
## 2 MF                        7466
## 3 MM                        7235

Prevalence Counts

prevalence_aggregate <- readRDS(file='~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/analysis/prevalence_data/prevalence_aggregate.rds')


phewas_functional_domain_map_binary_no_all <- phewas_functional_domain_map %>% 
  filter(functional_domain !='Quantitative') %>%
filter(functional_domain !='Avg. Monthly Cost') %>%
filter(functional_domain !='PheWAS Comorbids') %>%
  filter(functional_domain !='All Traits') 



prev_all <- prevalence_aggregate %>% 
            group_by(phewas_code, PheWAS_Indicator) %>% 
            summarise(n=sum(countTwin)) %>%
            spread(PheWAS_Indicator, n, fill=0) %>%
            mutate(prevalence = `1` / (`0` + `1`) ) %>%
            right_join(., phewas_functional_domain_map_binary_no_all, by='phewas_code') %>%
            mutate(category = 'All')



prev_M <- prevalence_aggregate %>% 
            group_by(phewas_code, PheWAS_Indicator, Gender) %>% 
            summarise(n=sum(countTwin)) %>%
            filter(Gender == 'M') %>% select(-Gender) %>%
            spread(PheWAS_Indicator, n, fill=0) %>%
            mutate(prevalence = `1` / (`0` + `1`) ) %>%
            right_join(., phewas_functional_domain_map_binary_no_all, by='phewas_code') %>%
            mutate(category = 'Male')



prev_F <- prevalence_aggregate %>% 
            group_by(phewas_code, PheWAS_Indicator, Gender) %>% 
            summarise(n=sum(countTwin)) %>%
            filter(Gender == 'F') %>% select(-Gender) %>%
            spread(PheWAS_Indicator, n, fill=0) %>%
            mutate(prevalence = `1` / (`0` + `1`) ) %>%
            right_join(., phewas_functional_domain_map_binary_no_all, by='phewas_code') %>%
            mutate(category = 'Female')


prev <- prev_all %>% rbind(.,prev_M) %>% rbind(., prev_F) %>% filter(!is.na(functional_domain ))


rm(prev_all, prev_F, prev_M)



#functional_domain_summary_all <- prev %>% select(phewas_code,prevalence, category) %>% unique() %>% group_by(category) %>%summarise(median = median(prevalence, na.rm = TRUE), IQ25 = quantile(prevalence, .25), IQ75 = quantile(prevalence,.75), numTraits = n_distinct(phewas_code)) 


#functional_domain_summary_all$functional_domain <- 'All'

#functional_domain_summary <- prev %>% group_by(functional_domain, category) %>% summarise(median = median(prevalence, na.rm = TRUE), IQ25 = quantile(prevalence, .25), IQ75 = quantile(prevalence,.75), numTraits = n_distinct(phewas_code))

#functional_domain_summary <- functional_domain_summary %>% bind_rows(.,functional_domain_summary_all)

#DT::datatable(functional_domain_summary) %>% 
#  formatSignif('median',2) %>% 
#  formatSignif('IQ25',2) %>% 
#  formatSignif('IQ75',2)

#DT::datatable(functional_domain_summary %>% select(functional_domain) %>% unique())


DT::datatable(prev)

Boxplots of Prevalence by Functional Category

plot_fig <- ggplot(prev, aes(functional_domain, prevalence)) +
  geom_boxplot(aes(colour=category), outlier.alpha = 0.4) + 
   scale_y_log10() +
  theme(axis.text.x=element_text(angle=45, size = 8),
        axis.title.x = element_text(vjust=10)) +
 ylab('Prevalence') + xlab('Functional Domain') +
  theme(legend.position="bottom",legend.box = "horizontal")




ggsave(plot_fig, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/prevalence_functional_category.png', height=3.5, width=6)

plot_fig

Map of Twin Pairs and SES/Population Density Distribution

map_prevalence_data <- readRDS(all,file = '~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/maps/map_prevalence_data.rds')

pp <- ggplot() +
  geom_polygon(data = map_prevalence_data, 
               aes(x = long, y = lat, group = group, fill = numPairs), 
               color = "black", size = 0.25) + 
  coord_map() +
  labs(fill = "Number of Twin Pairs") + xlim(-140,-66) + theme_map() 

#pp


df_Pop <- readRDS(file='~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/revised_analysis/SES_Analysis/popDensity_twin_ACS.rds')

df_SES <- readRDS(file='~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/revised_analysis/SES_Analysis/SES_twin_ACS.rds')



popDensityPlot <- ggplot(df_Pop, aes(x=densityBin, y=normalizedPop)) + 
  geom_bar(stat='identity', aes(fill = cohort))  + 
  facet_wrap(~cohort,nrow = 1) +
  theme(axis.text.x = element_text(angle = 60, size=1), 
        axis.title.x = element_text(size=8),
        axis.title.y = element_text(size=8)) + 
  xlab("Log of Population Per Square Mile Area") + 
  ylab("% Total Population")




sesDensityPlot <- ggplot(df_SES, aes(x=SES_bin, y=normalizedPop)) + 
  geom_bar(stat='identity', aes(fill = cohort))  + 
  facet_wrap(~cohort,nrow = 1) +
  theme(axis.text.x = element_text(angle = 60, size=1), 
        axis.title.x = element_text(size=8),
        axis.title.y = element_text(size=8)) + 
  xlab("Depravity Index Score") + ylab("% Total Population") 




figure1a <- ggarrange(popDensityPlot,sesDensityPlot,labels = c( 'b)','c)'),  
                      common.legend=TRUE, ncol = 2, legend = 'bottom',
                      font.label = list(size = 10))

figure1 <- ggarrange(pp,figure1a,labels = c( "a)", ''),  common.legend=FALSE, nrow = 2, ncol = 1,legend = 'top',
                     font.label = list(size = 10))

#figure1


ggsave(figure1,file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/figure1.png')
## Saving 7 x 5 in image

Correlation between Depravity Index and Median Family Income for 500 Cities Data

PCLoadings_500Cities <- readRDS(file='~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/revised_analysis/data/PCLoadings_500Cities.rds')


cor_fit_deprav_income <- cor.test(-PCLoadings_500Cities$PC1,PCLoadings_500Cities$medianincome)


cor_fit_deprav_income
## 
##  Pearson's product-moment correlation
## 
## data:  -PCLoadings_500Cities$PC1 and PCLoadings_500Cities$medianincome
## t = 269.61, df = 26565, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.8525273 0.8589647
## sample estimates:
##       cor 
## 0.8557791

Meta-Analysis

Setup

all_traits_df <- data.frame(functional_domain = 'All Traits', 
                            r_MZ = 0.636, 
                            r_MZ_SE = 0.002, 
                            r_DZ=0.339,
                            r_DZ_SE = 0.003,
                            h2 = 0.488,
                            h2_SE = 0.005,
                            c2 = 0.174,
                            c2_SE = 0.004,
                            h2_corr = 0.593,
                            h2_corr_SE = 0.008,
                            c2_corr = 0.042,
                            c2_corr_SE = 0.007,
                            r_MZ_nstudies = NA ,
                            r_MZ_ntraits = 9568,
                            r_DZ_nstudies = NA,
                            r_DZ_ntraits = 5220,
                            h2_nstudies = NA,
                            h2_ntraits = 2929,
                            c2_nstudies = NA,
                            c2_ntraits = 2771)



functional_domain_df <- readRDS(file='~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/analysis/match_phewas_mapping/functional_domain/functional_domain.rds')


functional_domain_df <- functional_domain_df %>% rbind(.,all_traits_df)


functional_domain_df <-   functional_domain_df %>% 
  filter(functional_domain != 'Socialinteractions') %>%
  filter(functional_domain != 'Socialvalues') %>%
  filter(functional_domain != 'Cell') %>%
  filter(functional_domain != 'Activities') %>%
  filter(functional_domain != 'Nutritional') %>%
  filter(functional_domain != 'Mortality') %>%
filter(functional_domain != 'Muscular')

h2 meta-analysis

zz <- allphewas_both_functional_domain %>% 
  filter(phewas_code != 'COST') %>%
  filter(phewas_code != 'CNT') %>% 
  split(.$functional_domain) %>% purrr::map(~rma(yi=h2_liab,sei=h2_liab_SE,data=., method='DL'))
## Warning in rma(yi = h2_liab, sei = h2_liab_SE, data = ., method = "DL"):
## Studies with NAs omitted from model fitting.

## Warning in rma(yi = h2_liab, sei = h2_liab_SE, data = ., method = "DL"):
## Studies with NAs omitted from model fitting.
phewas_b_h2 <- zz %>% purrr::map(summary)  %>%  map_df(~.$b[1]) %>% gather(functional_domain, h2_liab,1:ncol(.))

phewas_se_h2 <- zz %>% purrr::map(summary) %>% map_df("se") %>% gather(functional_domain, h2_liab_SE,1:ncol(.))

phewas_meta_h2_liab_all <- phewas_b_h2 %>% inner_join(.,phewas_se_h2, by='functional_domain') %>% mutate(category = 'Insurance Claims')

names(phewas_meta_h2_liab_all) <- c('functional_domain','h2','h2_SE', 'category')




MaTCH_h2 <- functional_domain_df %>% select(functional_domain, h2_corr, h2_corr_SE) %>% mutate(category = 'MaTCH')

names(MaTCH_h2) <- c('functional_domain','h2','h2_SE', 'category')


metaAll_justMatch_h2 <- phewas_meta_h2_liab_all %>% rbind(.,MaTCH_h2) 


metaAll_justMatch_h2 <- metaAll_justMatch_h2 %>%
  mutate(h2_low = h2-1.96*h2_SE, h2_high = h2+1.96*h2_SE )

DT::datatable(round_df(metaAll_justMatch_h2, digits=3))
rm(zz, phewas_b_h2, phewas_se_h2, MaTCH_h2)

c2 meta-analysis

zz <- allphewas_both_functional_domain %>% 
  filter(phewas_code != 'COST') %>%
  filter(phewas_code != 'CNT') %>% 
  split(.$functional_domain) %>% purrr::map(~rma(yi=c2_liab,sei=c2_liab_SE,data=., method='DL'))
## Warning in rma(yi = c2_liab, sei = c2_liab_SE, data = ., method = "DL"):
## Studies with NAs omitted from model fitting.

## Warning in rma(yi = c2_liab, sei = c2_liab_SE, data = ., method = "DL"):
## Studies with NAs omitted from model fitting.
phewas_b <- zz %>% purrr::map(summary)  %>%  map_df(~.$b[1]) %>% gather(functional_domain, c2_liab,1:ncol(.))

phewas_se <- zz %>% purrr::map(summary) %>% map_df("se") %>% gather(functional_domain, c2_liab_SE,1:ncol(.))

phewas_meta_c2_liab_all <- phewas_b %>% inner_join(.,phewas_se, by='functional_domain') %>% mutate(category = 'Insurance Claims')

names(phewas_meta_c2_liab_all) <- c('functional_domain','c2','c2_SE', 'category')




MaTCH <- functional_domain_df %>% select(functional_domain, c2_corr, c2_corr_SE) %>% mutate(category = 'MaTCH')

names(MaTCH) <- c('functional_domain','c2','c2_SE', 'category')


metaAll_justMatch_c2 <- phewas_meta_c2_liab_all %>% rbind(.,MaTCH) 


metaAll_justMatch_c2 <- metaAll_justMatch_c2 %>%
  mutate(c2_low = c2-1.96*c2_SE, c2_high = c2+1.96*c2_SE )

DT::datatable(round_df(metaAll_justMatch_c2, digits=3))
rm(zz, phewas_b, phewas_se, MaTCH)

e2 meta-analysis

zz <- allphewas_both_functional_domain %>% 
  filter(phewas_code != 'COST') %>%
  filter(phewas_code != 'CNT') %>% 
  split(.$functional_domain) %>% purrr::map(~rma(yi=e2,sei=e2_SE,data=., method='DL'))
## Warning in rma(yi = e2, sei = e2_SE, data = ., method = "DL"): Studies with
## NAs omitted from model fitting.

## Warning in rma(yi = e2, sei = e2_SE, data = ., method = "DL"): Studies with
## NAs omitted from model fitting.
phewas_b <- zz %>% purrr::map(summary)  %>%  map_df(~.$b[1]) %>% gather(functional_domain, e2,1:ncol(.))

phewas_se <- zz %>% purrr::map(summary) %>% map_df("se") %>% gather(functional_domain, e2_SE,1:ncol(.))

phewas_meta_e2_liab_all <- phewas_b %>% inner_join(.,phewas_se, by='functional_domain') %>% mutate(category = 'Insurance Claims')

names(phewas_meta_e2_liab_all) <- c('functional_domain','e2','e2_SE', 'category')






rm(zz, phewas_b, phewas_se)

rss meta-analysis

zz <- allphewas_both_functional_domain %>% 
  filter(phewas_code != 'COST') %>%
  filter(phewas_code != 'CNT') %>% 
  split(.$functional_domain) %>% purrr::map(~rma(yi=rliabss,sei=rliabss_SE,data=., method='DL'))
## Warning in rma(yi = rliabss, sei = rliabss_SE, data = ., method = "DL"):
## Studies with NAs omitted from model fitting.

## Warning in rma(yi = rliabss, sei = rliabss_SE, data = ., method = "DL"):
## Studies with NAs omitted from model fitting.
phewas_b <- zz %>% purrr::map(summary)  %>%  map_df(~.$b[1]) %>% gather(functional_domain, rliabss,1:ncol(.))

phewas_se <- zz %>% purrr::map(summary) %>% map_df("se") %>% gather(functional_domain, rliabss_SE,1:ncol(.))

phewas_meta_rss_liab_all <- phewas_b %>% inner_join(.,phewas_se, by='functional_domain') %>% mutate(category = 'Insurance Claims')

names(phewas_meta_rss_liab_all) <- c('functional_domain','rSS','rSS_SE', 'category')



output_rliabSS <- phewas_meta_rss_liab_all %>%
  mutate(rSS_low = rSS-1.96*rSS_SE, rSS_high = rSS+1.96*rSS_SE )

DT::datatable(round_df(output_rliabSS, digits=3))

ros meta-analysis

zz <- allphewas_both_functional_domain %>% 
  filter(phewas_code != 'COST') %>%
  filter(phewas_code != 'CNT') %>% filter(rliabos_SE > .00000001) %>%
  split(.$functional_domain) %>% purrr::map(~rma(yi=rliabos,sei=rliabos_SE,data=., method='DL'))


phewas_b <- zz %>% purrr::map(summary)  %>%  map_df(~.$b[1]) %>% gather(functional_domain, rliabos,1:ncol(.))

phewas_se <- zz %>% purrr::map(summary) %>% map_df("se") %>% gather(functional_domain, rliabos_SE,1:ncol(.))

phewas_meta_ros_liab_all <- phewas_b %>% inner_join(.,phewas_se, by='functional_domain') %>% mutate(category = 'Insurance Claims')

names(phewas_meta_ros_liab_all) <- c('functional_domain','rOS','rOS_SE', 'category')


output_rliabOS <- phewas_meta_ros_liab_all %>%
  mutate(rOS_low = rOS-1.96*rOS_SE, rOS_high = rOS+1.96*rOS_SE )

DT::datatable(round_df(output_rliabOS, digits=3))

h2/c2 MaTCH Plot

metaAll_justMatch_h2$functional_domain <- factor(metaAll_justMatch_h2$functional_domain, 
                                                 levels = c('All Traits',
                                                            'Aging',
                                                            'Cardiovascular',
                                                            'Cognitive',
                                                            'Connective tissue',
                                                            'Dermatological',
                                                            'Developmental',
                                                            'Ear,Nose,Throat',
                                                            'Endocrine',
                                                            'Environment',
                                                            'Gastrointestinal',
                                                            'Hematological',
                                                            'Immunological',
                                                            'Infection',
                                                            'Metabolic',
                                                            'Neoplasms',
                                                            'Neurological',
                                                            'Ophthalmological',
                                                            'Psychiatric',
                                                            'Reproduction',
                                                            'Respiratory',
                                                            'Skeletal',
                                                            'Quantitative',
                                                            'Avg. Monthly Cost',
                                                            'PheWAS Comorbids'))



metaAll_justMatch_c2$functional_domain <- factor(metaAll_justMatch_c2$functional_domain, 
                                                 levels = c('All Traits',
                                                            'Aging',
                                                            'Cardiovascular',
                                                            'Cognitive',
                                                            'Connective tissue',
                                                            'Dermatological',
                                                            'Developmental',
                                                            'Ear,Nose,Throat',
                                                            'Endocrine',
                                                            'Environment',
                                                            'Gastrointestinal',
                                                            'Hematological',
                                                            'Immunological',
                                                            'Infection',
                                                            'Metabolic',
                                                            'Neoplasms',
                                                            'Neurological',
                                                            'Ophthalmological',
                                                            'Psychiatric',
                                                            'Reproduction',
                                                            'Respiratory',
                                                            'Skeletal',
                                                            'Quantitative',
                                                            'Avg. Monthly Cost',
                                                            'PheWAS Comorbids'))






h2_liab_ggplot <- ggplot(metaAll_justMatch_h2, aes(y=h2, x=functional_domain, colour=category)) + 
  geom_point(position = position_dodge(width = .5)) +  
  geom_errorbar(aes(ymin=h2-1.96*h2_SE, ymax=h2+1.96*h2_SE),position = position_dodge(width = 0.5)) + 
  coord_flip() +  
  theme(axis.text=element_text(size=8))  + 
  scale_x_discrete(labels = function(x) str_wrap(x, width = 80)) +
  theme(axis.title = element_text()) + ylab('Heritability (h2)') + xlab('Functional Domain')


ggsave(h2_liab_ggplot,file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/h2_liab_comparison_match.png', height=3.5, width=6)
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_errorbar).
c2_liab_ggplot <- ggplot(metaAll_justMatch_c2, aes(y=c2, x=functional_domain, colour=category)) + 
  geom_point(position = position_dodge(width = .5)) +  
  geom_errorbar(aes(ymin=c2-1.96*c2_SE, ymax=c2+1.96*c2_SE),position = position_dodge(width = 0.5)) + 
  coord_flip() +     
  theme(axis.text=element_text(size=8))  + 
  scale_x_discrete(labels = function(x) str_wrap(x, width = 80)) +
  theme(axis.title = element_text()) + ylab('Shared Environmental Variance (c2)') + xlab('Functional Domain')



ggsave(c2_liab_ggplot,file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/c2_liab_comparison_match.png', height=3.5, width=6)
## Warning: Removed 3 rows containing missing values (geom_point).

## Warning: Removed 3 rows containing missing values (geom_errorbar).
h2_c2_MaTCH <- ggarrange(h2_liab_ggplot,c2_liab_ggplot,labels = c( "a)", 'b)'), nrow = 2, ncol = 1, common.legend=TRUE,legend='bottom' )
## Warning: Removed 3 rows containing missing values (geom_point).

## Warning: Removed 3 rows containing missing values (geom_errorbar).
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_errorbar).
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_errorbar).
#h2_c2_MaTCH

ggsave(h2_c2_MaTCH,file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/h2_c2_MaTCH.png', height = 10, width=8)

Barplot by functional group

phewas_meta_h2_liab_all$stat <- 'h2'
phewas_meta_c2_liab_all$stat <- 'c2'
#phewas_meta_e2_liab_all$stat <- 'e2'
phewas_meta_rss_liab_all$stat <- 'rOS'
phewas_meta_ros_liab_all$stat <- 'rSS'

a <- phewas_meta_h2_liab_all %>% select(functional_domain, statistic = h2, stat)
b <- phewas_meta_c2_liab_all %>% select(functional_domain, statistic = c2, stat)
#c <- phewas_meta_e2_liab_all %>% select(functional_domain, statistic = e2, stat)
d <- phewas_meta_ros_liab_all %>% select(functional_domain, statistic = rOS, stat)
e <- phewas_meta_rss_liab_all %>% select(functional_domain, statistic = rSS, stat)

z_meta_all <- a %>% bind_rows(.,b,d,e)

rm(a,b,d,e)

z_meta_all$stat <- factor(z_meta_all$stat, levels=c('rOS','rSS', 'h2','c2'))


z_meta_all$functional_domain <- factor(z_meta_all$functional_domain, 
                                                 levels = c('All Traits',
                                                            'Aging',
                                                            'Cardiovascular',
                                                            'Cognitive',
                                                            'Connective tissue',
                                                            'Dermatological',
                                                            'Developmental',
                                                            'Ear,Nose,Throat',
                                                            'Endocrine',
                                                            'Environment',
                                                            'Gastrointestinal',
                                                            'Hematological',
                                                            'Immunological',
                                                            'Infection',
                                                            'Metabolic',
                                                            'Neoplasms',
                                                            'Neurological',
                                                            'Ophthalmological',
                                                            'Psychiatric',
                                                            'Reproduction',
                                                            'Respiratory',
                                                            'Skeletal',
                                                            'Quantitative',
                                                            'Avg. Monthly Cost',
                                                            'PheWAS Comorbids'))



barchart_fn_domain <- ggplot(z_meta_all, aes(functional_domain, statistic, fill=stat)) + 
  geom_bar(stat="identity", position="dodge", width = 0.75, colour="black") +
  theme(axis.text.x=element_text(angle=45), text=element_text(size=8) ) +
    theme(axis.title = element_text()) +
ylab('Statistic Value') + xlab('Functional Domain') + labs(fill='Statistic') 

barchart_fn_domain

ggsave(barchart_fn_domain, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/barchart_fn_domain.png')
## Saving 7 x 5 in image

Barplot with environmental information

meta-analysis h2 with environmental variables

zz <- allphewas_both_zipcode_functional_domain %>% 
  filter(phewas_code != 'COST') %>%
  filter(phewas_code != 'CNT') %>% 
  split(.$functional_domain) %>% purrr::map(~rma(yi=h2_liab,sei=h2_liab_SE,data=., method='DL'))


phewas_b <- zz %>% purrr::map(summary)  %>%  map_df(~.$b[1]) %>% gather(functional_domain, h2_liab,1:ncol(.))

phewas_se <- zz %>% purrr::map(summary) %>% map_df("se") %>% gather(functional_domain, h2_liab_SE,1:ncol(.))

environment_meta_h2_liab_all_environment <- phewas_b %>% inner_join(.,phewas_se, by='functional_domain') %>% mutate(category = 'Insurance Claims')

names(environment_meta_h2_liab_all_environment) <- c('functional_domain','h2','h2_SE', 'category')


environment_meta_h2_liab_all_environment <- environment_meta_h2_liab_all_environment %>% 
  mutate(h2_low=h2-1.96*h2_SE,h2_high=h2+1.96*h2_SE )


DT::datatable(round_df(environment_meta_h2_liab_all_environment, digits=3))
rm(zz,phewas_b, phewas_se)

meta-analysis c2 with environmental variables

zz <- allphewas_both_zipcode_functional_domain %>% 
  filter(phewas_code != 'COST') %>%
  filter(phewas_code != 'CNT') %>% 
  split(.$functional_domain) %>% purrr::map(~rma(yi=c2_liab,sei=c2_liab_SE,data=., method='DL'))


phewas_b <- zz %>% purrr::map(summary)  %>%  map_df(~.$b[1]) %>% gather(functional_domain, c2_liab,1:ncol(.))

phewas_se <- zz %>% purrr::map(summary) %>% map_df("se") %>% gather(functional_domain, c2_liab_SE,1:ncol(.))

environment_meta_c2_liab_all_environment <- phewas_b %>% inner_join(.,phewas_se, by='functional_domain') %>% mutate(category = 'Insurance Claims')

names(environment_meta_c2_liab_all_environment) <- c('functional_domain','c2','c2_SE', 'category')

environment_meta_c2_liab_all_environment <- environment_meta_c2_liab_all_environment %>% mutate(c2_low=c2-1.96*c2_SE,c2_high=c2+1.96*c2_SE )

DT::datatable(round_df(environment_meta_c2_liab_all_environment, digits=3))
rm(zz,phewas_b, phewas_se)

meta-analysis e2 with environmental variables

zz <- allphewas_both_zipcode_functional_domain %>% 
  filter(phewas_code != 'COST') %>%
  filter(phewas_code != 'CNT') %>% 
  split(.$functional_domain) %>% purrr::map(~rma(yi=e2,sei=e2_SE,data=., method='DL'))
## Warning in rma(yi = e2, sei = e2_SE, data = ., method = "DL"): Studies with
## NAs omitted from model fitting.

## Warning in rma(yi = e2, sei = e2_SE, data = ., method = "DL"): Studies with
## NAs omitted from model fitting.

## Warning in rma(yi = e2, sei = e2_SE, data = ., method = "DL"): Studies with
## NAs omitted from model fitting.

## Warning in rma(yi = e2, sei = e2_SE, data = ., method = "DL"): Studies with
## NAs omitted from model fitting.

## Warning in rma(yi = e2, sei = e2_SE, data = ., method = "DL"): Studies with
## NAs omitted from model fitting.
phewas_b <- zz %>% purrr::map(summary)  %>%  map_df(~.$b[1]) %>% gather(functional_domain, e2,1:ncol(.))

phewas_se <- zz %>% purrr::map(summary) %>% map_df("se") %>% gather(functional_domain, e2_SE,1:ncol(.))

environment_meta_e2_liab_all_environment <- phewas_b %>% inner_join(.,phewas_se, by='functional_domain') %>% mutate(category = 'Insurance Claims')

names(environment_meta_e2_liab_all_environment) <- c('functional_domain','e2','e2_SE', 'category')


environment_meta_e2_liab_all_environment <- environment_meta_e2_liab_all_environment %>% 
  mutate(e2_low=e2-1.96*e2_SE,e2_high=e2+1.96*e2_SE )


DT::datatable(round_df(environment_meta_e2_liab_all_environment, digits=3))
rm(zz,phewas_b, phewas_se)

meta-analysis rliabincome with environmental variables

zz <- allphewas_both_zipcode_functional_domain %>% 
  filter(phewas_code != 'COST') %>%
  filter(phewas_code != 'CNT') %>% 
  split(.$functional_domain) %>% purrr::map(~rma(yi=rliabincome,sei=rliabincome_SE,data=., method='DL'))
## Warning in rma(yi = rliabincome, sei = rliabincome_SE, data = ., method =
## "DL"): Studies with NAs omitted from model fitting.

## Warning in rma(yi = rliabincome, sei = rliabincome_SE, data = ., method =
## "DL"): Studies with NAs omitted from model fitting.

## Warning in rma(yi = rliabincome, sei = rliabincome_SE, data = ., method =
## "DL"): Studies with NAs omitted from model fitting.

## Warning in rma(yi = rliabincome, sei = rliabincome_SE, data = ., method =
## "DL"): Studies with NAs omitted from model fitting.

## Warning in rma(yi = rliabincome, sei = rliabincome_SE, data = ., method =
## "DL"): Studies with NAs omitted from model fitting.
phewas_b <- zz %>% purrr::map(summary)  %>%  map_df(~.$b[1]) %>% gather(functional_domain, rliabincome,1:ncol(.))

phewas_se <- zz %>% purrr::map(summary) %>% map_df("se") %>% gather(functional_domain, rliabincome_SE,1:ncol(.))

environment_meta_rliabincome_all_environment <- phewas_b %>% inner_join(.,phewas_se, by='functional_domain') %>% mutate(category = 'Insurance Claims')

names(environment_meta_rliabincome_all_environment) <- c('functional_domain','var_income','var_income_SE', 'category')



environment_meta_rliabincome_all_environment <- environment_meta_rliabincome_all_environment %>% 
  mutate(var_income_low=var_income-1.96*var_income_SE,var_income_high=var_income+1.96*var_income_SE )


DT::datatable(round_df(environment_meta_rliabincome_all_environment, digits=3))
rm(zz,phewas_b, phewas_se)

meta-analysis rliabaqi with environmental variables

zz <- allphewas_both_zipcode_functional_domain %>% 
  filter(phewas_code != 'COST') %>%
  filter(phewas_code != 'CNT') %>% 
  split(.$functional_domain) %>% purrr::map(~rma(yi=rliabaqi,sei=rliabaqi_SE,data=., method='DL'))


phewas_b <- zz %>% purrr::map(summary)  %>%  map_df(~.$b[1]) %>% gather(functional_domain, rliabaqi,1:ncol(.))

phewas_se <- zz %>% purrr::map(summary) %>% map_df("se") %>% gather(functional_domain, rliabaqi_SE,1:ncol(.))

environment_meta_rliabaqi_all_environment <- phewas_b %>% inner_join(.,phewas_se, by='functional_domain') %>% mutate(category = 'Insurance Claims')

names(environment_meta_rliabaqi_all_environment) <- c('functional_domain','var_aqi','var_aqi_SE', 'category')



environment_meta_rliabaqi_all_environment <- environment_meta_rliabaqi_all_environment %>% 
  mutate(var_aqi_low=var_aqi-1.96*var_aqi_SE, var_aqi_high=var_aqi+1.96*var_aqi_SE )


DT::datatable(round_df(environment_meta_rliabaqi_all_environment, digits=3))
rm(zz,phewas_b, phewas_se)

meta-analysis rliabtemp with environmental variables

zz <- allphewas_both_zipcode_functional_domain %>% 
  filter(phewas_code != 'COST') %>%
  filter(phewas_code != 'CNT') %>% 
  split(.$functional_domain) %>% purrr::map(~rma(yi=rliabtemp,sei=rliabtemp_SE,data=., method='DL'))
## Warning in rma(yi = rliabtemp, sei = rliabtemp_SE, data = ., method =
## "DL"): Studies with NAs omitted from model fitting.

## Warning in rma(yi = rliabtemp, sei = rliabtemp_SE, data = ., method =
## "DL"): Studies with NAs omitted from model fitting.
phewas_b <- zz %>% purrr::map(summary)  %>%  map_df(~.$b[1]) %>% gather(functional_domain, rliabtemp,1:ncol(.))

phewas_se <- zz %>% purrr::map(summary) %>% map_df("se") %>% gather(functional_domain, rliabtemp_SE,1:ncol(.))

environment_meta_rliabtemp_all_environment <- phewas_b %>% inner_join(.,phewas_se, by='functional_domain') %>% 
  mutate(category = 'Insurance Claims')

names(environment_meta_rliabtemp_all_environment) <- c('functional_domain','var_temp','var_temp_SE', 'category')


environment_meta_rliabtemp_all_environment <- environment_meta_rliabtemp_all_environment %>% 
  mutate(var_temp_low=var_temp-1.96*var_temp_SE, var_temp_high=var_temp+1.96*var_temp_SE )


DT::datatable(round_df(environment_meta_rliabtemp_all_environment, digits=3))
rm(zz,phewas_b, phewas_se)
DT::datatable(allphewas_both_zipcode %>% filter(rliabincome.pvalue_FDR <= 0.05) %>% summarise(n=n())   )
DT::datatable(allphewas_both_zipcode %>% filter(rliabaqi.pvalue_FDR <= 0.05) %>% summarise(n=n())   )
DT::datatable(allphewas_both_zipcode %>% filter(rliabtemp.pvalue_FDR <= 0.05) %>% summarise(n=n())   )
var_environment_confidence_interval <- allphewas_both_zipcode %>% 
  select(phewas_code, phewas_description, rliabincome, rliabincome_SE, rliabaqi, rliabaqi_SE, rliabtemp, rliabtemp_SE) %>%
  mutate(rliabincome_low=rliabincome-1.96*rliabincome_SE, rliabincome_high=rliabincome+1.96*rliabincome_SE) %>%
  mutate(rliabaqi_low=rliabaqi-1.96*rliabaqi_SE, rliabaqi_high=rliabaqi+1.96*rliabaqi_SE) %>%
  mutate(rliabtemp_low=rliabtemp-1.96*rliabtemp_SE, rliabtemp_high=rliabtemp+1.96*rliabtemp_SE) %>%
  select(phewas_code, phewas_description, rliabincome,rliabincome_low,rliabincome_high,rliabaqi,rliabaqi_low,rliabaqi_high,rliabtemp,rliabtemp_low,rliabtemp_high)


DT::datatable(round_df(var_environment_confidence_interval,digits = 3))

Barplot by functional group - environment

environment_meta_h2_liab_all_environment$stat <- 'h2'
environment_meta_c2_liab_all_environment$stat <- 'c2'
#environment_meta_e2_liab_all_environment$stat <- 'e2'
environment_meta_rliabincome_all_environment$stat <- 'SES'
environment_meta_rliabaqi_all_environment$stat <- 'AQI'
environment_meta_rliabtemp_all_environment$stat <- 'temperature'


a <- environment_meta_h2_liab_all_environment %>% select(functional_domain, statistic = h2, stat)
b <- environment_meta_c2_liab_all_environment %>% select(functional_domain, statistic = c2, stat)
#c <- environment_meta_e2_liab_all_environment %>% select(functional_domain, statistic = e2, stat)
d <- environment_meta_rliabincome_all_environment %>% select(functional_domain, statistic = var_income, stat)
e <- environment_meta_rliabaqi_all_environment %>% select(functional_domain, statistic = var_aqi, stat)
f <- environment_meta_rliabtemp_all_environment %>% select(functional_domain, statistic = var_temp, stat)


z_meta_all <- a %>% bind_rows(.,b,d,e,f)

rm(a,b,d,e,f)

z_meta_all$stat <- factor(z_meta_all$stat, levels=c('h2','c2','SES','AQI','temperature'))


z_meta_all$functional_domain <- factor(z_meta_all$functional_domain, 
                                                 levels = c('All Traits',
                                                            'Aging',
                                                            'Cardiovascular',
                                                            'Cognitive',
                                                            'Connective tissue',
                                                            'Dermatological',
                                                            'Developmental',
                                                            'Ear,Nose,Throat',
                                                            'Endocrine',
                                                            'Environment',
                                                            'Gastrointestinal',
                                                            'Hematological',
                                                            'Immunological',
                                                            'Infection',
                                                            'Metabolic',
                                                            'Neoplasms',
                                                            'Neurological',
                                                            'Ophthalmological',
                                                            'Psychiatric',
                                                            'Reproduction',
                                                            'Respiratory',
                                                            'Skeletal',
                                                            'Quantitative',
                                                            'Avg. Monthly Cost',
                                                            'PheWAS Comorbids'))



barchart_fn_domain_environment <- ggplot(z_meta_all, aes(functional_domain, statistic, fill=stat)) + 
  geom_bar(stat="identity", position="dodge", width = 0.75, colour="black") +
  theme(axis.text.x=element_text(angle=45, size = 8),
        axis.title.x = element_text(vjust=10)) +
ylab('Statistic Value') + xlab('Functional Domain') + labs(fill='Statistic')  +
  theme(legend.position="bottom")


barchart_fn_domain_environment

ggsave(barchart_fn_domain_environment, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/barchart_fn_domain_environment.png')
## Saving 7 x 5 in image
braden_df_meta_analytic_estimates_environment <- z_meta_all

Barplot by functional group - just environment

environment_meta_rliabincome_all_environment$stat <- 'SES'
environment_meta_rliabaqi_all_environment$stat <- 'AQI'
environment_meta_rliabtemp_all_environment$stat <- 'temperature'


d <- environment_meta_rliabincome_all_environment %>% select(functional_domain, statistic = var_income, stat)
e <- environment_meta_rliabaqi_all_environment %>% select(functional_domain, statistic = var_aqi, stat)
f <- environment_meta_rliabtemp_all_environment %>% select(functional_domain, statistic = var_temp, stat)


z_meta_all <- d %>% bind_rows(.,e,f)

rm(d,e,f)

z_meta_all$stat <- factor(z_meta_all$stat, levels=c('SES','AQI','temperature'))


z_meta_all$functional_domain <- factor(z_meta_all$functional_domain, 
                                                 levels = c('All Traits',
                                                            'Aging',
                                                            'Cardiovascular',
                                                            'Cognitive',
                                                            'Connective tissue',
                                                            'Dermatological',
                                                            'Developmental',
                                                            'Ear,Nose,Throat',
                                                            'Endocrine',
                                                            'Environment',
                                                            'Gastrointestinal',
                                                            'Hematological',
                                                            'Immunological',
                                                            'Infection',
                                                            'Metabolic',
                                                            'Neoplasms',
                                                            'Neurological',
                                                            'Ophthalmological',
                                                            'Psychiatric',
                                                            'Reproduction',
                                                            'Respiratory',
                                                            'Skeletal',
                                                            'Quantitative',
                                                            'Avg. Monthly Cost',
                                                            'PheWAS Comorbids'))



barchart_fn_domain_just_environment <- ggplot(z_meta_all, aes(functional_domain, statistic, fill=stat)) + 
  geom_bar(stat="identity", position="dodge", width = 0.75, colour="black") +
  theme(axis.text.x=element_text(angle=45, size = 8),
        axis.title.x = element_text(vjust=10)) +
ylab('Statistic Value') + xlab('Functional Domain') + labs(fill='Statistic') +
  theme(legend.position="bottom")

barchart_fn_domain_just_environment

ggsave(barchart_fn_domain_just_environment, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/barchart_fn_domain_just_environment.png')
## Saving 7 x 5 in image

Number of Phenotypes in each Category with FDR and h2/c2 and number of additive traits

h2

h2_meta <- metaAll_justMatch_h2 %>% filter(category == 'Insurance Claims') %>%
  mutate(h2_low = h2 - 1.96*h2_SE, h2_high= h2+ 1.96*h2_SE) %>% ungroup()


h2_fdr <- allphewas_both_functional_domain %>% group_by(functional_domain) %>% summarise(count_n=n(), fdr_threshold=sum(pvalue_h2_FDR <= 0.05)) %>% mutate(pct_FDR = (fdr_threshold/count_n)*100.0) %>% ungroup()


h2_meta_fdr <- h2_meta %>% inner_join(.,h2_fdr, by='functional_domain')
## Warning: Column `functional_domain` joining factor and character vector,
## coercing into character vector
h2_meta_fdr <- round_df(h2_meta_fdr, digits=3)

DT::datatable(h2_meta_fdr)

c2

c2_meta <- metaAll_justMatch_c2 %>% filter(category == 'Insurance Claims') %>%
  mutate(c2_low = c2 - 1.96*c2_SE, c2_high= c2+ 1.96*c2_SE) %>% ungroup()


c2_fdr <- allphewas_both_functional_domain %>% group_by(functional_domain) %>% summarise(count_n=n(), fdr_threshold=sum(pvalue_c2_FDR <= 0.05)) %>% mutate(pct_FDR = (fdr_threshold/count_n)*100.0) %>% ungroup()


c2_meta_fdr <- c2_meta %>% inner_join(.,c2_fdr, by='functional_domain')
## Warning: Column `functional_domain` joining factor and character vector,
## coercing into character vector
c2_meta_fdr <- round_df(c2_meta_fdr, digits=3)

DT::datatable(c2_meta_fdr)

e2

e2_meta <- phewas_meta_e2_liab_all %>% filter(category == 'Insurance Claims') %>%
  mutate(e2_low = e2 - 1.96*e2_SE, e2_high= e2+ 1.96*e2_SE) %>% ungroup()


e2_fdr <- allphewas_both_functional_domain %>% group_by(functional_domain) %>% summarise(count_n=n(), fdr_threshold=sum(e2.pvalue_FDR <= 0.05)) %>% mutate(pct_FDR = (fdr_threshold/count_n)*100.0) %>% ungroup()


e2_meta_fdr <- e2_meta %>% inner_join(.,e2_fdr, by='functional_domain')


e2_meta_fdr <- round_df(e2_meta_fdr, digits=3)

DT::datatable(e2_meta_fdr)

SES variance component

#environment_meta_rliabincome_all_environment

#e2_meta <- environment_meta_rliabincome_all_environment %>% filter(category == 'Insurance Claims') %>%
#  mutate(var_income_low = var_income - 1.96*e2_SE, e2_high= e2+ 1.96*e2_SE) %>% ungroup()


SES_fdr <- allphewas_both_zipcode_functional_domain %>% 
  filter(!is.na(rliabincome.pvalue_FDR)) %>%
  group_by(functional_domain) %>% 
  summarise(count_n=n(), 
            fdr_threshold=sum(rliabincome.pvalue_FDR <= 0.05))  %>% ungroup() %>%
  mutate(pctThreshold=(fdr_threshold/561)*100)


SES_fdr <- environment_meta_rliabincome_all_environment %>% full_join(.,SES_fdr, by='functional_domain')


SES_fdr <- round_df(SES_fdr, digits=3)

DT::datatable(SES_fdr)

AQI variance component

#environment_meta_rliabincome_all_environment

#e2_meta <- environment_meta_rliabincome_all_environment %>% filter(category == 'Insurance Claims') %>%
#  mutate(var_income_low = var_income - 1.96*e2_SE, e2_high= e2+ 1.96*e2_SE) %>% ungroup()


AQI_fdr <- allphewas_both_zipcode_functional_domain %>% 
  filter(!is.na(rliabaqi.pvalue_FDR)) %>%
  group_by(functional_domain) %>% 
  summarise(count_n=n(), 
            fdr_threshold=sum(rliabaqi.pvalue_FDR <= 0.05))  %>% ungroup() %>%
  mutate(pctThreshold=(fdr_threshold/561)*100)


AQI_fdr <- environment_meta_rliabaqi_all_environment %>% full_join(.,AQI_fdr, by='functional_domain')


AQI_fdr <- round_df(AQI_fdr, digits=3)

DT::datatable(AQI_fdr)

Temperature variance component

#environment_meta_rliabincome_all_environment

#e2_meta <- environment_meta_rliabincome_all_environment %>% filter(category == 'Insurance Claims') %>%
#  mutate(var_income_low = var_income - 1.96*e2_SE, e2_high= e2+ 1.96*e2_SE) %>% ungroup()


temp_fdr <- allphewas_both_zipcode_functional_domain %>% 
  filter(!is.na(rliabtemp.pvalue_FDR)) %>%
  group_by(functional_domain) %>% 
  summarise(count_n=n(), 
            fdr_threshold=sum(rliabtemp.pvalue_FDR <= 0.05))  %>% ungroup() %>%
  mutate(pctThreshold=(fdr_threshold/561)*100)


temp_fdr <- environment_meta_rliabtemp_all_environment %>% full_join(.,temp_fdr, by='functional_domain')


temp_fdr <- round_df(temp_fdr, digits=3)

DT::datatable(temp_fdr)

Comparison of old var comp environment estimates with new var comp environment estimates

phewas <- read.table("~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/analysis/phewasAllDescription.csv", header=TRUE)


allphewas_binary_zipcode_old <- read_delim("~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/twin_binary_zipcode_old.csv", 
    " ", escape_double = FALSE, col_names = FALSE, 
    trim_ws = TRUE)
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   X1 = col_character(),
##   X2 = col_integer(),
##   X3 = col_integer(),
##   X4 = col_integer(),
##   X5 = col_integer(),
##   X6 = col_integer(),
##   X7 = col_integer(),
##   X8 = col_integer(),
##   X9 = col_integer(),
##   X75 = col_integer(),
##   X76 = col_integer(),
##   X77 = col_integer(),
##   X78 = col_integer(),
##   X79 = col_integer()
## )
## See spec(...) for full column specifications.
names(allphewas_binary_zipcode_old) <- readRDS('~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/names_binary_zipcode_old.rds')


allphewas_binary_zipcode_old <- allphewas_binary_zipcode_old %>%
  mutate(h2_liab_SE_se = ifelse(phewas_code == '984',h2_SE,h2_liab_SE_se)) %>%
  mutate(rliabss_SE_se = ifelse(phewas_code == '984',r_SS_SE,rliabss_SE_se)) %>%
  mutate(c2_liab_SE_se = ifelse(phewas_code == '984',c2_SE,c2_liab_SE_se)) %>%
    mutate(rliabos_SE_se = ifelse(phewas_code == '984',r_OS_SE,rliabos_SE_se)) 






names(allphewas_binary_zipcode_old) <- sub("_se", "", names(allphewas_binary_zipcode_old))



allphewas_binary_zipcode_old <- allphewas_binary_zipcode_old %>% inner_join(phewas, ., by='phewas_code') 
## Warning: Column `phewas_code` joining factor and character vector, coercing
## into character vector
comparison_var_comp_old <- allphewas_binary_zipcode_old %>%
  select(phewas_code, phewas_description,
         rliabincome_old =rliabincome, rliabincome_SE_old=rliabincome_SE, 
         rliabaqi_old=rliabaqi, rliabaqi_SE_old=rliabaqi_SE, 
         rliabtemp_old=rliabtemp, rliabtemp_SE_old=rliabtemp_SE)


comparison_var_comp_new <- allphewas_both_zipcode %>%
  select(phewas_code, 
         rliabincome, rliabincome_SE, 
         rliabaqi, rliabaqi_SE, 
         rliabtemp, rliabtemp_SE)


comparison_var_comp_both <- comparison_var_comp_old %>%
  inner_join(.,comparison_var_comp_new, by='phewas_code') %>%
  mutate(diff_income = rliabincome - rliabincome_old, diff_income_SE = sqrt(rliabincome_SE^2+rliabincome_SE_old^2)) %>%
  mutate(diff_aqi = rliabaqi - rliabaqi_old, diff_aqi_SE = sqrt(rliabaqi_SE^2+rliabaqi_SE_old^2)) %>%
mutate(diff_temp = rliabtemp - rliabtemp_old, diff_temp_SE = sqrt(rliabtemp_SE^2+rliabtemp_SE_old^2)) %>%
mutate(diff_income.zscore=diff_income/diff_income_SE) %>%
  mutate(diff_income.pvalue=2*pnorm(-abs(diff_income.zscore))) %>%
  mutate(diff_income.pvalue_FDR = p.adjust(diff_income.pvalue, method='BY')) %>%
mutate(diff_aqi.zscore=diff_aqi/diff_aqi_SE) %>%
  mutate(diff_aqi.pvalue=2*pnorm(-abs(diff_aqi.zscore))) %>%
  mutate(diff_aqi.pvalue_FDR = p.adjust(diff_aqi.pvalue, method='BY')) %>%
mutate(diff_temp.zscore=diff_temp/diff_temp_SE) %>%
  mutate(diff_temp.pvalue=2*pnorm(-abs(diff_temp.zscore))) %>%
  mutate(diff_temp.pvalue_FDR = p.adjust(diff_temp.pvalue, method='BY'))
  




ggplot(comparison_var_comp_both , aes(rliabincome_old,rliabincome)) + geom_point()  + 
  geom_abline(slope = 1, intercept = 0) 

ggplot(comparison_var_comp_both , aes(rliabaqi_old,rliabaqi)) + geom_point()  + 
  geom_abline(slope = 1, intercept = 0) 

ggplot(comparison_var_comp_both , aes(rliabtemp_old,rliabtemp)) + geom_point()  + 
  geom_abline(slope = 1, intercept = 0) 

## Income FDR and pi_0

qval_income <- qvalue(comparison_var_comp_both$diff_income.pvalue)

summary(qval_income)
## 
## Call:
## qvalue(p = comparison_var_comp_both$diff_income.pvalue)
## 
## pi0: 1   
## 
## Cumulative number of significant calls:
## 
##           <1e-04 <0.001 <0.01 <0.025 <0.05 <0.1  <1
## p-value        4     12    23     31    40   59 500
## q-value        1      1     4      8    13   18  89
## local FDR      1      1     2      4     5    9  33
comparison_var_comp_both %>% filter(diff_income.pvalue_FDR <= 0.05) %>% tally()
##   n
## 1 3
comparison_var_comp_both %>% filter(diff_income.pvalue_FDR <= 0.05) %>% select(phewas_description)
##                           phewas_description
## 1 Otitis media and Eustachian tube disorders
## 2                               Otitis media
## 3                            Acute sinusitis
comparison_var_comp_both %>% filter(diff_income < 0) %>% tally()
##     n
## 1 260
comparison_var_comp_both %>% filter(diff_income > 0) %>% tally()
##     n
## 1 243
comparison_var_comp_both %>% filter(diff_income == 0) %>% tally()
##    n
## 1 49
income_rank_var <- comparison_var_comp_both %>% 
  select(phewas_code, phewas_description, rliabincome_old, rliabincome, rliabincome_SE, rliabincome_SE_old) %>%
  mutate(oldRank=rank(-rliabincome_old), newRank=rank(-rliabincome)) %>%
  mutate(rliabincome_old_low=rliabincome_old-1.96*rliabincome_SE_old,
         rliabincome_old_high=rliabincome_old+1.96*rliabincome_SE_old,
         rliabincome_low=rliabincome-1.96*rliabincome_SE,
         rliabincome_high=rliabincome+1.96*rliabincome_SE)
  

DT::datatable(round_df(income_rank_var, digits = 3))

AQI FDR and pi_0

qval_AQI <- qvalue(comparison_var_comp_both$diff_aqi.pvalue)

summary(qval_AQI)
## 
## Call:
## qvalue(p = comparison_var_comp_both$diff_aqi.pvalue)
## 
## pi0: 1   
## 
## Cumulative number of significant calls:
## 
##           <1e-04 <0.001 <0.01 <0.025 <0.05 <0.1  <1
## p-value        5      9    15     23    25   44 479
## q-value        1      2     6      9     9   14  25
## local FDR      0      1     2      3     8    8  15
comparison_var_comp_both %>% filter(diff_aqi.pvalue_FDR <= 0.05) %>% tally()
##   n
## 1 2
comparison_var_comp_both %>% filter(diff_aqi.pvalue_FDR <= 0.05) %>% select(phewas_description)
##   phewas_description
## 1    Acute sinusitis
## 2  Allergic rhinitis
comparison_var_comp_both %>% filter(diff_aqi < 0) %>% tally()
##     n
## 1 205
comparison_var_comp_both %>% filter(diff_aqi > 0) %>% tally()
##     n
## 1 274
comparison_var_comp_both %>% filter(diff_aqi == 0) %>% tally()
##    n
## 1 73
aqi_rank_var <- comparison_var_comp_both %>% 
  select(phewas_code, phewas_description, rliabaqi_old, rliabaqi, rliabaqi_SE, rliabaqi_SE_old) %>%
  mutate(oldRank=rank(-rliabaqi_old), newRank=rank(-rliabaqi)) %>%
  mutate(rliabaqi_old_low=rliabaqi_old-1.96*rliabaqi_SE_old,
         rliabaqi_old_high=rliabaqi_old+1.96*rliabaqi_SE_old,
         rliabaqi_low=rliabaqi-1.96*rliabaqi_SE,
         rliabaqi_high=rliabaqi+1.96*rliabaqi_SE)

DT::datatable(round_df(aqi_rank_var, digits = 3))

Temp FDR and pi_0

qval_temp <- qvalue(comparison_var_comp_both$diff_temp.pvalue)

summary(qval_temp)
## 
## Call:
## qvalue(p = comparison_var_comp_both$diff_temp.pvalue)
## 
## pi0: 1   
## 
## Cumulative number of significant calls:
## 
##           <1e-04 <0.001 <0.01 <0.025 <0.05 <0.1  <1
## p-value        0      0     2      6     7   14 496
## q-value        0      0     0      0     0    0   0
## local FDR      0      0     0      0     0    0   0
comparison_var_comp_both %>% filter(diff_temp.pvalue_FDR <= 0.05) %>% tally()
##   n
## 1 0
comparison_var_comp_both %>% filter(diff_temp < 0) %>% tally()
##     n
## 1 257
comparison_var_comp_both %>% filter(diff_temp > 0) %>% tally()
##     n
## 1 240
comparison_var_comp_both %>% filter(diff_temp == 0) %>% tally()
##    n
## 1 55
temp_rank_var <- comparison_var_comp_both %>% 
  select(phewas_code, phewas_description, rliabtemp_old, rliabtemp, rliabtemp_SE, rliabtemp_SE_old) %>%
  mutate(oldRank=rank(-rliabtemp_old), newRank=rank(-rliabtemp)) %>%
  mutate(rliabtemp_old_low=rliabtemp_old-1.96*rliabtemp_SE_old,
         rliabtemp_old_high=rliabtemp_old+1.96*rliabtemp_SE_old,
         rliabtemp_low=rliabtemp-1.96*rliabtemp_SE,
         rliabtemp_high=rliabtemp+1.96*rliabtemp_SE)

DT::datatable(round_df(temp_rank_var, digits = 3))

Number of Additive Traits

############### Subfunction ############################
## Compute the estimate of pi0 for a fixed value of B ##
########################################################
FixedB <- function(p, B)
{
 ## Input:
 #p: a vector of p-values
 #B: an integer, the interval [0,1] is divided into B equal-length intervals
 
 ## Output:
 #pi0: an estimate of the proportion of true null hypotheses
 
 m <- length(p) 
 t <- seq(0,1,length=B+1)   # equally spaced points in the interval [0,1]
    
 NB <- rep(0,B)         # number of p-values greater than t_i   
 NBaverage <- rep(0,B)      # average number of p-values in each of the (B-i+1) small intervals on [t_i,1]
 NS <- rep(0,B)             # number of p-values in the interval [t_i, t_(i+1)]
 pi <- rep(0,B)         # estimates of pi0  
 for(i in 1:B)
 {
  NB[i] <- length(p[p>=t[i]])
  NBaverage[i] <- NB[i]/(B-(i-1))
  NS[i] <- length(p[p>=t[i]]) - length(p[p>=t[i+1]])
  pi[i] <- NB[i]/(1-t[i])/m
 }

 i <- min(which(NS <= NBaverage))  # Find change point i
 pi0 <- min(1, mean(pi[(i-1):B]))          # average estiamte of pi0
 return(pi0)
}


############## main function ###########################
## (1) Compute the estimate of pi0                    ##
## (2) Apply the adaptive FDR controlling procedure   ##
########################################################
AverageEstimate <- function(p=NULL, Bvector=c(5, 10, 20, 50, 100), alpha=0.05)
{
 ## Input:
 #p: a vector of p-values
 #Bvector: a vector of integer values where the interval [0,1] is divided into B equal-length intervals
 #         When Bvector is an integer, number of intervals is consider as fixed. For example Bvector = 10;
 #         When Bvector is a vector, bootstrap method is used to choose an optimal value of B
 #alpha: FDR signficance level so that the FDR is controlled below alpha
 #Numboot: number of bootstrap samples

 ## Output:
 #pi0: an estimate of the proportion of true null hypotheses
 #significant: a vector of indicator variables; 
 #             it is 1 if the corresponding p-value is significant
 #         it is 0 if the corresponding p-value is not significant
    
 # check if the p-values are valid
 if (min(p)<0 || max(p)>1) print("Error: p-values are not in the interval of [0,1]")
 m <- length(p)         # Total number p-values
 
 Bvector <- as.integer(Bvector)  # Make sure Bvector is a vector of integers
 
 #Bvector has to be bigger than 1
 if(min(Bvector) <=1) print ("Error: B has to be bigger than 1")
 
 ######## Estimate pi0 ########
 if(length(Bvector) == 1)        # fixed number of numbers, i.e., B is fixed
 {
   pi0 <- AverageEstimateFixedB(p, Bvector)
 }
 else
 {
   Numboot <- 100
   OrigPi0Est <- rep(0, length(Bvector))
   for (Bloop in 1:length(Bvector))
   {
    OrigPi0Est[Bloop] <- FixedB(p, Bvector[Bloop])
   }
 
   BootResult <- matrix(0, nrow=Numboot, ncol=length(Bvector)) # Contains the bootstrap results
 
   for(k in 1:Numboot)
   {
     p.boot <- sample(p, m, replace=TRUE)    # bootstrap sample 
     for (Bloop in 1:length(Bvector))
     {
      BootResult[k,Bloop] <- FixedB(p.boot, Bvector[Bloop])
     }  
    }

   MeanPi0Est <- mean(OrigPi0Est)             # avearge of pi0 estimates over the range of Bvector
   MSEestimate <- rep(0, length(Bvector))     # compute mean-squared error
   for (i in 1:length(Bvector))
   {
     MSEestimate[i] <- (OrigPi0Est[i]- MeanPi0Est)^2
     for (k in 1:Numboot)
    {
      MSEestimate[i] <- MSEestimate[i]+1/Numboot*(BootResult[k,i] - OrigPi0Est[i])^2    
    }
   }
   pi0 <- OrigPi0Est[MSEestimate==min(MSEestimate)]
 }  # end of else           
 
 
 ######## Apply the adaptive FDR controlling procedure ########
 
 sorted.p <- sort(p)        # sorted p-values
 order.p <- order(p)        # order of the p-values
 m0 <- pi0*m                    # estimate of the number of true null       
 i <- m

 crit <- i/m0*alpha

 while (sorted.p[i] > crit )
  { 
    i <- i-1
    crit <- i/m0*alpha
    if (i==1) break
   }
  K <- i
  if (K==1 & sorted.p[K] <= 1/m0*alpha) K <- 1
  if (K==1 & sorted.p[K] > 1/m0*alpha)  K <- 0

  significant <- rep(0,m)       # indicator of significance of the p-values
  if (K > 0) significant[order.p[1:K]] <- 1

  result <- list(pi0=pi0, significant=significant)
  result
}
DT::datatable(allphewas_both %>% filter(c2_liab.pvalue <= 0.05) %>% tally())
DT::datatable(allphewas_both %>% filter(c2_liab.pvalue > 0.05) %>% tally())
m_sig <- allphewas_both_functional_domain %>% filter(c2_liab.pvalue <= 0.05) %>% group_by(functional_domain) %>% tally() %>% mutate(sig_num=n) %>% select(-n)

m_non_sig <- allphewas_both_functional_domain %>% filter(c2_liab.pvalue > 0.05) %>% group_by(functional_domain) %>% tally() %>% mutate(insig_num=n) %>% select(-n)


m_both <- m_non_sig %>% left_join(., m_sig, by='functional_domain')


 m_both[is.na(m_both)] <- 0
 
 m_both$pct_additive <-( m_both$insig_num / (m_both$sig_num + m_both$insig_num) ) * 100
 
  m_both$total <-( (m_both$sig_num + m_both$insig_num) ) 
 
 DT::datatable(round_df(m_both, digits=3)) 
 dataPvalue <- allphewas_both %>% select(c2_liab.pvalue) %>% filter(complete.cases(.))

 
 
 library(qvalue)
 
 qobj <- qvalue(dataPvalue$c2_liab.pvalue)


 summary(qobj)
## 
## Call:
## qvalue(p = dataPvalue$c2_liab.pvalue)
## 
## pi0: 0.5251091   
## 
## Cumulative number of significant calls:
## 
##           <1e-04 <0.001 <0.01 <0.025 <0.05 <0.1  <1
## p-value      141    164   208    227   259  286 560
## q-value      134    155   195    221   254  286 560
## local FDR    117    132   151    171   184  201 367
 AverageEstimate(dataPvalue$c2_liab.pvalue )
## $pi0
## [1] 0.4747355
## 
## $significant
##   [1] 1 1 1 1 1 0 1 0 0 1 1 1 1 1 0 0 0 1 0 0 1 1 1 1 1 1 1 0 1 1 1 0 1 1 0
##  [36] 0 0 0 0 0 0 0 1 0 1 1 0 0 1 1 0 1 1 1 1 1 0 1 1 1 0 0 0 1 1 1 1 0 0 0
##  [71] 0 0 0 1 0 1 1 0 1 1 1 1 0 1 1 0 0 0 0 0 0 0 0 1 1 0 1 0 0 0 0 0 0 0 0
## [106] 0 1 0 1 0 0 0 1 0 0 0 0 0 0 0 1 1 0 1 0 1 1 0 1 1 0 0 0 0 0 0 0 0 0 0
## [141] 0 0 1 0 0 0 1 1 1 0 0 1 1 0 0 1 0 0 1 0 0 1 1 1 1 0 1 1 1 1 1 1 1 1 1
## [176] 1 0 1 1 1 0 1 1 0 1 1 1 1 0 0 1 0 1 0 1 1 1 0 1 1 1 0 0 0 1 1 1 1 1 1
## [211] 0 0 1 0 0 0 1 1 0 1 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 1 0 1 0 0
## [246] 1 1 1 1 1 1 0 0 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 0 0 0 1 1 1 1 1 0 0 0 0
## [281] 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 0 0 0 0 0 0 0 0 1 0 0 0 1 0
## [316] 0 0 0 0 1 0 0 0 1 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1
## [351] 1 1 1 0 1 1 1 1 0 0 0 0 0 0 1 0 1 0 1 1 1 1 1 0 0 0 0 0 0 0 1 1 0 0 0
## [386] 0 1 1 1 0 0 1 0 0 0 0 0 0 1 1 0 0 0 1 0 0 0 0 1 1 0 1 0 1 1 1 0 1 1 1
## [421] 1 1 1 1 1 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1 0 1 0 1 0 0 0 1 0 0 0 0 0
## [456] 0 0 0 1 1 1 0 1 1 1 0 0 0 0 0 0 1 1 0 0 0 0 0 1 0 0 0 1 1 0 0 1 1 1 1
## [491] 0 0 0 1 1 0 0 0 1 1 0 1 0 1 0 0 0 1 0 0 0 0 0 1 1 1 0 1 0 1 0 0 0 0 1
## [526] 1 1 0 0 0 0 1 0 1 0 0 1 1 0 1 1 0 0 0 0 1 1 1 0 1 0 0 0 0 1 1 1 1 1 1

FDR Correction Phenotypes

DT::datatable(allphewas_both %>% filter( pvalue_h2_FDR <= 0.05) %>% tally() / 561)
DT::datatable(allphewas_both %>% filter(pvalue_h2_FDR > 0.05) %>% tally() / 561)
DT::datatable(allphewas_both %>% filter( pvalue_h2_FDR <= 0.05) %>% tally())
DT::datatable(allphewas_both %>% filter(pvalue_h2_FDR > 0.05) %>% tally())
DT::datatable(allphewas_both %>% filter( pvalue_c2_FDR <= 0.05) %>% tally())
DT::datatable(allphewas_both %>% filter(pvalue_c2_FDR > 0.05) %>% tally())
DT::datatable(round_df(allphewas_both %>% filter( pvalue_h2_FDR <= 0.05) %>% tally() / 561, digits=3))
DT::datatable(round_df(allphewas_both %>% filter( pvalue_c2_FDR <= 0.05) %>% tally() / 561, digits=3))

Bonferroni Correction Phenotypes

DT::datatable(allphewas_both %>% filter( pvalue_h2_bonferroni <= 0.05) %>% tally() / 561)
DT::datatable(allphewas_both %>% filter(pvalue_h2_bonferroni > 0.05) %>% tally() / 561)
DT::datatable(allphewas_both %>% filter( pvalue_h2_bonferroni <= 0.05) %>% tally())
DT::datatable(allphewas_both %>% filter(pvalue_h2_bonferroni > 0.05) %>% tally())
DT::datatable(allphewas_both %>% filter( pvalue_c2_bonferroni <= 0.05) %>% tally())
DT::datatable(allphewas_both %>% filter(pvalue_c2_bonferroni > 0.05) %>% tally())
DT::datatable(round_df(allphewas_both %>% filter( pvalue_h2_bonferroni <= 0.05) %>% tally() / 561, digits=3))
DT::datatable(round_df(allphewas_both %>% filter( pvalue_c2_bonferroni <= 0.05) %>% tally() / 561, digits=3))

FDR Correction beta age

DT::datatable(allphewas_both %>% filter( pvalue_beta_age_FDR <= 0.05) %>% tally())
DT::datatable(round_df(allphewas_both %>% filter( pvalue_beta_age_FDR <= 0.05) %>% tally() / 561, digits=3))

FDR Correction beta gender

DT::datatable(allphewas_both %>% filter( pvalue_beta_gender_FDR <= 0.05) %>% tally())
DT::datatable(round_df(allphewas_both %>% filter( pvalue_beta_gender_FDR <= 0.05) %>% tally() / 561, digits=3))

h2/c2/e2 for all phenotypes

phenotypes_h2_c2_e2 <- allphewas_both %>% select(phewas_code, phewas_description, h2_liab, h2_liab_SE, c2_liab, c2_liab_SE, e2, e2_SE) %>% 
  mutate(h2_low=h2_liab-1.96*h2_liab_SE,h2_high=h2_liab+1.96*h2_liab_SE ) %>%
  mutate(c2_low=c2_liab-1.96*c2_liab_SE,c2_high=c2_liab+1.96*c2_liab_SE ) %>%
  mutate(e2_low=e2-1.96*e2_SE,e2_high=e2+1.96*e2_SE ) %>%
  select(phewas_code, phewas_description, h2_liab,h2_low,h2_high, c2_liab, c2_low,c2_high, e2, e2_low,e2_high)


DT::datatable(round_df(phenotypes_h2_c2_e2, digits=3))

h2/c2/e2 for all phenotypes with p-value

phenotypes_h2_c2_e2_pvalue <- allphewas_both %>% select(phewas_code, phewas_description, h2_liab, h2_liab_SE, 
                                                 c2_liab, c2_liab_SE, e2, e2_SE,
                                                 pvalue_h2_FDR, h2_liab.zscore, pvalue_c2_FDR, c2_liab.zscore, e2.pvalue_FDR, e2.zscore, rliabos, rliabos_SE, rliabss, rliabss_SE) %>% 
  mutate(h2_low=h2_liab-1.96*h2_liab_SE,h2_high=h2_liab+1.96*h2_liab_SE ) %>%
  mutate(c2_low=c2_liab-1.96*c2_liab_SE,c2_high=c2_liab+1.96*c2_liab_SE ) %>%
  mutate(e2_low=e2-1.96*e2_SE,e2_high=e2+1.96*e2_SE ) %>%
  mutate(rliabos_low = rliabos - 1.96* rliabos_SE, rliabos_high = rliabos + 1.96* rliabos_SE  ) %>%
  mutate(rliabss_low = rliabss - 1.96* rliabss_SE, rliabss_high = rliabss + 1.96* rliabss_SE) %>%
  select(phewas_code, phewas_description, h2_liab,h2_low,h2_high, pvalue_h2_FDR, h2_liab.zscore,
         c2_liab, c2_low,c2_high,pvalue_c2_FDR, c2_liab.zscore, 
         e2, e2_low,e2_high, e2.pvalue_FDR, e2.zscore,
         rliabos, rliabos_low, rliabos_high, rliabss, rliabss_low, rliabss_high)


DT::datatable(round_df(phenotypes_h2_c2_e2_pvalue, digits = 3))

Comparison of h2/c2 to published literature

Load Comparison Data

twin_h2_c2_comparison_diff_twin_match <- readRDS(file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/revised_analysis/MaTCHComparisonAnalysis/twin_h2_c2_comparison_diff_twin_match.rds')

Estimate Correlation Data

cor.test(twin_h2_c2_comparison_diff_twin_match$h2_liab, twin_h2_c2_comparison_diff_twin_match$h2_match,method = 'pearson')
## 
##  Pearson's product-moment correlation
## 
## data:  twin_h2_c2_comparison_diff_twin_match$h2_liab and twin_h2_c2_comparison_diff_twin_match$h2_match
## t = 4.633, df = 79, p-value = 1.399e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2712569 0.6181865
## sample estimates:
##       cor 
## 0.4622291
m <- as.matrix(twin_h2_c2_comparison_diff_twin_match %>% select(h2_liab,h2_liab_SE, h2_match, h2_match_SE))
estimateCorrelation_matrix(1:nrow(m),m)
## [1] 0.8168958
results <- jackknife(1:nrow(m),estimateCorrelation_matrix,m)
results$jack.se
## [1] 0.1652211
results$jack.bias
## [1] 0.06142777

Plot Scatterplot

#ggplot(twin_h2_c2_comparison_diff_twin_match , aes(h2_match,h2_liab)) + geom_point() + 
#    geom_errorbarh(aes(xmin=h2_match-1.96*h2_match_SE, xmax=h2_match+1.96*h2_match_SE),
#                   position = position_dodge(width = 0.01), alpha=.2) +
#      geom_errorbar(aes(ymin=h2_liab-1.96*h2_liab_SE, ymax=h2_liab+1.96*h2_liab_SE),
#                    position = position_dodge(width = 0.01), alpha=.2) + geom_abline(slope = 1, intercept = 0) +
#    geom_smooth(method = "lm", se = TRUE) + xlab('Heritability - Published Studies') + ylab('Heritability - Insurance #Study')

Number of Traits

twin_h2_c2_comparison_diff_twin_match %>%
  mutate(diff=h2_match-h2_liab) %>%
  mutate(higherEstimate=ifelse(diff >=0, 'Published Is Higher', 'Claims is Higher')) %>%
  group_by(higherEstimate) %>%
  tally()
## # A tibble: 2 x 2
##   higherEstimate          n
##   <chr>               <int>
## 1 Claims is Higher       32
## 2 Published Is Higher    49
twinComparison_insurance_published <- twin_h2_c2_comparison_diff_twin_match %>% 
  select(-subchapter, -diff) %>%
  select(subchapterName,h2_insurance=h2_liab, 
         h2_insurance_SE=h2_liab_SE, h2_published=h2_match,
         h2_published_SE=h2_match_SE,source, method)


DT::datatable(twinComparison_insurance_published)
#library(googlesheets)


#boring_ss <- gs_new("published_estimate_comparison", 
#                    ws_title = "published_estimate_comparison", 
#                    input = round_df(twinComparison_insurance_published,digits = 3),
#                    trim = TRUE, verbose = FALSE)

Sibling Analysis

Number of Pairs by Gender

Load Sibling Data

library(readr)
library(data.table)
## 
## Attaching package: 'data.table'
## The following object is masked from 'package:DescTools':
## 
##     %like%
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
## The following object is masked from 'package:purrr':
## 
##     transpose
allphewas_binary <- read_delim("~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/twin_binary.csv", 
    " ", escape_double = FALSE, col_names = FALSE, 
    trim_ws = TRUE)
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   X1 = col_character(),
##   X2 = col_integer(),
##   X3 = col_integer(),
##   X4 = col_integer(),
##   X5 = col_integer(),
##   X6 = col_integer(),
##   X7 = col_integer(),
##   X8 = col_integer(),
##   X9 = col_integer(),
##   X57 = col_integer(),
##   X58 = col_integer(),
##   X59 = col_integer(),
##   X60 = col_integer(),
##   X61 = col_integer()
## )
## See spec(...) for full column specifications.
names(allphewas_binary) <- readRDS('~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/names_binary.rds')

phewas <- read.table("~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/analysis/phewasAllDescription.csv", header=TRUE)

allphewas_binary <- allphewas_binary %>% 
  mutate(h2_liab_SE_se = ifelse(phewas_code == '984',h2_SE,h2_liab_SE_se)) %>%
  mutate(rliabss_SE_se = ifelse(phewas_code == '984',r_SS_SE,rliabss_SE_se)) %>%
  mutate(c2_liab_SE_se = ifelse(phewas_code == '984',c2_SE,c2_liab_SE_se)) %>%
    mutate(rliabos_SE_se = ifelse(phewas_code == '984',r_OS_SE,rliabos_SE_se)) 


names(allphewas_binary) <- sub("_se", "", names(allphewas_binary))



allphewas_binary <- allphewas_binary %>% inner_join(phewas, ., by='phewas_code') 
## Warning: Column `phewas_code` joining factor and character vector, coercing
## into character vector
allphewas_sibling <- read_delim("~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/revised_analysis/data/allphewas_sibling.txt", 
    " ", escape_double = FALSE, col_names = FALSE, trim_ws = TRUE)
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   X1 = col_character(),
##   X2 = col_integer(),
##   X3 = col_integer(),
##   X4 = col_integer(),
##   X5 = col_integer(),
##   X6 = col_integer(),
##   X31 = col_character()
## )
## See spec(...) for full column specifications.
names(allphewas_sibling) <- readRDS('~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/revised_analysis/data/names_binary_sibling.rds')

names(allphewas_sibling) <- sub("_se", "", names(allphewas_sibling))


df1 <- allphewas_sibling %>% select(phewas_code, genderPairType,rliabsibling ) %>% spread(genderPairType,rliabsibling) %>%
  select(phewas_code, rliabsibling_SS=SS,rliabsibling_OS=OS)

df2 <- allphewas_sibling %>% select(phewas_code, genderPairType,rliabsibling_SE) %>% spread(genderPairType,rliabsibling_SE) %>%
    select(phewas_code, rliabsibling_SE_SS=SS,rliabsibling_SE_OS=OS)


allphewas_sibling_correlation <- df1 %>% inner_join(.,df2, by='phewas_code')

allphewas_binary_correlation <- allphewas_binary %>% select(phewas_code, phewas_description, rliabss, rliabss_SE,rliabos, rliabos_SE, h2_liab, h2_liab_SE, c2_liab, c2_liab_SE)


allphewas_all_correlation_sibling_twin <- allphewas_binary_correlation %>% left_join(.,allphewas_sibling_correlation, by='phewas_code')


rm(df1,df2)

Comparison of OS sibling correlation vs OS twin correlation

correlation_twinOS_sibOS <- ggplot(allphewas_all_correlation_sibling_twin , aes(rliabos,rliabsibling_OS)) + geom_point() + 
    geom_errorbarh(aes(xmin=rliabos-1.96*rliabos_SE, xmax=rliabos+1.96*rliabos_SE),
                   position = position_dodge(width = 0.01), alpha=.2) +
      geom_errorbar(aes(ymin=rliabsibling_OS-1.96*rliabsibling_SE_OS, ymax=rliabsibling_OS+1.96*rliabsibling_SE_OS),
                    position = position_dodge(width = 0.01), alpha=.2) + geom_abline(slope = 1, intercept = 0) + 
  xlab('Twin Opposite Sex Correlation') + 
  ylab('Sibling Opposite Sex Correlation')

correlation_twinOS_sibOS
## Warning: position_dodge requires non-overlapping x intervals

ggsave(correlation_twinOS_sibOS, file='~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/correlation_twinOS_sibOS.png')
## Saving 7 x 5 in image
## Warning: position_dodge requires non-overlapping x intervals

Comparison of OS sibling correlation vs SS twin correlation

correlation_twinOS_sibSS <- ggplot(allphewas_all_correlation_sibling_twin , aes(rliabos,rliabsibling_SS)) + geom_point() + 
    geom_errorbarh(aes(xmin=rliabos-1.96*rliabos_SE, xmax=rliabos+1.96*rliabos_SE),
                   position = position_dodge(width = 0.01), alpha=.2) +
      geom_errorbar(aes(ymin=rliabsibling_SS-1.96*rliabsibling_SE_SS, ymax=rliabsibling_SS+1.96*rliabsibling_SE_SS),
                    position = position_dodge(width = 0.01), alpha=.2) + geom_abline(slope = 1, intercept = 0) + 
  xlab('Twin Opposite Sex Correlation') + 
  ylab('Sibling Same Sex Correlation')

correlation_twinOS_sibSS
## Warning: position_dodge requires non-overlapping x intervals

ggsave(correlation_twinOS_sibSS, file='~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/correlation_twinOS_sibSS.png')
## Saving 7 x 5 in image
## Warning: position_dodge requires non-overlapping x intervals

Comparison of Sibling OS vs SS correlation

correlation_sibOS_sibSS <- ggplot(allphewas_sibling_correlation , aes(rliabsibling_SS,rliabsibling_OS)) + geom_point() + 
    geom_errorbarh(aes(xmin=rliabsibling_SS-1.96*rliabsibling_SE_SS, xmax=rliabsibling_SS+1.96*rliabsibling_SE_SS),
                   position = position_dodge(width = 0.01), alpha=.2) +
      geom_errorbar(aes(ymin=rliabsibling_OS-1.96*rliabsibling_SE_OS, ymax=rliabsibling_OS+1.96*rliabsibling_SE_OS),
                    position = position_dodge(width = 0.01), alpha=.2) + geom_abline(slope = 1, intercept = 0) +
  xlab('Sibling Same Sex Correlation') + ylab('Sibling Opposite Sex Correlation')  +
  geom_smooth(method = "lm", se = TRUE)  +
    theme(axis.title.x = element_text(size=12), 
          axis.title.y = element_text(size=12)) 



correlation_sibOS_sibSS
## Warning: position_dodge requires non-overlapping x intervals

ggsave(correlation_sibOS_sibSS, file='~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/correlation_sibOS_sibSS.png')
## Saving 7 x 5 in image
## Warning: position_dodge requires non-overlapping x intervals
allphewas_sibling_correlation_dff <- allphewas_sibling_correlation %>%
  mutate(rsib_diff=rliabsibling_SS-rliabsibling_OS)

diff_sib_corOS_corSS <- ggplot(allphewas_sibling_correlation_dff, aes(rsib_diff)) +
  geom_histogram(bins = 60) + xlab("Same Sex Correlation - Opposite Sex Correlation")

diff_sib_corOS_corSS

ggsave(diff_sib_corOS_corSS, file='~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/diff_sib_corOS_corSS.png')
## Saving 7 x 5 in image

Number of Correlations which cross 0

allphewas_sibling_correlation <- allphewas_sibling_correlation %>%
  mutate(rsib_diff = rliabsibling_SS-rliabsibling_OS) %>%
  mutate(rsib_diff_SE = sqrt(rliabsibling_SE_SS^2+rliabsibling_SE_OS^2)) %>%
  mutate(rsib_diff_low = rsib_diff - 1.96*rsib_diff_SE,
         rsib_diff_high = rsib_diff + 1.96*rsib_diff_SE) %>%
  mutate(rsib_diff.zscore=rsib_diff/rsib_diff_SE) %>%
  mutate(rsib_diff_SE.pvalue=2*pnorm(-abs(rsib_diff.zscore))) %>%
  mutate(rsib_diff.pvalue_FDR = p.adjust(rsib_diff_SE.pvalue, method='BY')) 

Number of Phenotypes where 95% CI overlaps with 0

DT::datatable(allphewas_sibling_correlation %>% filter(rsib_diff_low < 0 & rsib_diff_high > 0) %>% tally())
DT::datatable(round_df(allphewas_sibling_correlation %>% filter(rsib_diff_low < 0 & rsib_diff_high > 0) %>% tally()/551,digits = 3))

Number of Phenotypes where p-value is less then 0.05

allphewas_sibling_correlation %>% filter(rsib_diff.pvalue_FDR <= 0.05) %>% tally()/561
##           n
## 1 0.3850267
DT::datatable(round_df(allphewas_sibling_correlation %>% filter(rsib_diff.pvalue_FDR <= 0.05) %>% tally()
, digits = 3))
DT::datatable(round_df(allphewas_sibling_correlation %>% filter(rsib_diff.pvalue_FDR <= 0.05) %>% tally()/561
, digits = 3))

Pi_0 Statistics

 qobj_sib <- qvalue(allphewas_sibling_correlation$rsib_diff_SE.pvalue)

summary(qobj_sib)
## 
## Call:
## qvalue(p = allphewas_sibling_correlation$rsib_diff_SE.pvalue)
## 
## pi0: 0.2363052   
## 
## Cumulative number of significant calls:
## 
##           <1e-04 <0.001 <0.01 <0.025 <0.05 <0.1  <1
## p-value      152    190   271    302   336  361 551
## q-value      152    199   298    346   386  442 551
## local FDR    128    152   207    246   276  316 551
 AverageEstimate(allphewas_sibling_correlation$rsib_diff_SE.pvalue )
## $pi0
## [1] 0.2814059
## 
## $significant
##   [1] 0 1 0 1 0 0 1 1 1 1 1 1 1 0 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0
##  [36] 0 0 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1
##  [71] 0 0 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 0 1 1 0 0 0 0 0 0
## [106] 1 1 1 1 0 0 1 1 1 1 1 1 1 0 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
## [141] 0 1 0 1 1 1 0 1 0 0 0 1 0 0 1 1 1 1 1 0 1 1 1 1 0 1 0 1 1 1 1 1 0 1 0
## [176] 0 1 1 1 0 1 1 1 1 1 1 1 0 1 0 1 0 1 0 0 1 1 1 1 1 0 1 1 1 1 1 1 1 0 1
## [211] 0 0 0 0 1 1 1 0 0 0 1 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 0 1 1 1 0 1 0 0
## [246] 1 1 1 0 0 1 0 1 1 1 1 1 1 1 0 0 1 1 0 1 0 1 1 1 1 1 0 0 1 0 1 1 0 0 0
## [281] 0 1 1 1 0 0 0 1 0 1 0 1 1 1 0 0 0 1 1 1 0 0 0 0 1 0 1 1 1 0 1 1 1 0 0
## [316] 1 0 0 1 1 0 0 0 1 0 1 1 1 0 0 1 0 0 0 1 0 1 1 0 1 0 1 1 1 1 0 0 0 0 1
## [351] 0 1 1 1 1 0 0 0 1 1 0 1 1 1 1 1 1 1 1 0 1 1 1 0 1 1 1 1 1 1 1 1 1 1 0
## [386] 1 0 1 1 1 1 1 1 0 0 1 0 1 1 1 1 1 0 1 1 1 0 0 1 1 0 0 0 0 1 0 0 0 0 1
## [421] 1 1 1 1 1 0 1 1 0 1 1 1 1 1 1 1 0 1 1 1 0 0 1 0 1 1 1 1 1 1 0 1 1 1 0
## [456] 0 0 1 1 1 0 1 1 1 1 0 0 0 1 1 0 0 0 0 1 1 1 1 1 1 0 0 1 1 0 1 0 1 1 1
## [491] 1 1 1 1 1 1 0 0 1 1 1 1 0 1 1 1 1 0 1 1 0 1 0 1 0 1 1 1 1 1 1 1 1 1 1
## [526] 0 1 1 1 1 0 1 1 1 0 1 0 1 0 1 0 0 1 1 1 0 1 1 0 0 0

Meta-Analysis

Create Sibling Functional Domain Data Frame

allphewas_both_functional_domain_sibling <- allphewas_all_correlation_sibling_twin %>% 
  inner_join(.,phewas_functional_domain_map, by=c('phewas_code', 'phewas_description'))
## Warning: Column `phewas_description` joining factor and character vector,
## coercing into character vector

rsibSS

zz <- allphewas_both_functional_domain_sibling %>% 
  filter(phewas_code != 'COST') %>%
  filter(phewas_code != 'CNT') %>% 
  split(.$functional_domain) %>% purrr::map(~rma(yi=rliabsibling_SS,sei=rliabsibling_SE_SS,data=., method='DL'))


phewas_b_rsib_SS <- zz %>% purrr::map(summary)  %>%  map_df(~.$b[1]) %>% gather(functional_domain, rliabsibling_SS,1:ncol(.))

phewas_se_rsib_SS <- zz %>% purrr::map(summary) %>% map_df("se") %>% gather(functional_domain, rliabsibling_SE_SS,1:ncol(.))

phewas_meta_rsib_SS_liab_all <- phewas_b_rsib_SS %>% inner_join(.,phewas_se_rsib_SS, by='functional_domain') %>% mutate(category = 'Insurance Claims')

names(phewas_meta_rsib_SS_liab_all) <- c('functional_domain','rsibSS','rsibSS_SE', 'category')


ribSSOverall <- phewas_meta_rsib_SS_liab_all %>%
  mutate(rsibSS_low=rsibSS-1.96*rsibSS_SE, rsibSS_high=rsibSS+1.96*rsibSS_SE )


DT::datatable(round_df(ribSSOverall, digits=3))
#MaTCH_h2 <- functional_domain_df %>% select(functional_domain, h2_corr, h2_corr_SE) %>% mutate(category = 'MaTCH')

#names(MaTCH_h2) <- c('functional_domain','h2','h2_SE', 'category')


#metaAll_justMatch_h2 <- phewas_meta_h2_liab_all %>% rbind(.,MaTCH_h2) 


#metaAll_justMatch_h2 <- metaAll_justMatch_h2 %>%
#  mutate(h2_low = h2-1.96*h2_SE, h2_high = h2+1.96*h2_SE )

#DT::datatable(round_df(metaAll_justMatch_h2, digits=3))

#rm(zz, phewas_b_h2, phewas_se_h2, MaTCH_h2)

rsibOS

zz <- allphewas_both_functional_domain_sibling %>% 
  filter(phewas_code != 'COST') %>%
  filter(phewas_code != 'CNT') %>% 
  split(.$functional_domain) %>% purrr::map(~rma(yi=rliabsibling_OS,sei=rliabsibling_SE_OS,data=., method='DL'))


phewas_b_rsib_OS <- zz %>% purrr::map(summary)  %>%  map_df(~.$b[1]) %>% gather(functional_domain, rliabsibling_OS,1:ncol(.))

phewas_se_rsib_OS <- zz %>% purrr::map(summary) %>% map_df("se") %>% gather(functional_domain, rliabsibling_SE_OS,1:ncol(.))

phewas_meta_rsib_OS_liab_all <- phewas_b_rsib_OS %>% inner_join(.,phewas_se_rsib_OS, by='functional_domain') %>% mutate(category = 'Insurance Claims')

names(phewas_meta_rsib_OS_liab_all) <- c('functional_domain','rsibOS','rsibOS_SE', 'category')


ribOSOverall <- phewas_meta_rsib_OS_liab_all %>%
  mutate(rsibOS_low=rsibOS-1.96*rsibOS_SE, rsibOS_high=rsibOS+1.96*rsibOS_SE )


DT::datatable(round_df(ribOSOverall, digits=3))
#MaTCH_h2 <- functional_domain_df %>% select(functional_domain, h2_corr, h2_corr_SE) %>% mutate(category = 'MaTCH')

#names(MaTCH_h2) <- c('functional_domain','h2','h2_SE', 'category')


#metaAll_justMatch_h2 <- phewas_meta_h2_liab_all %>% rbind(.,MaTCH_h2) 


#metaAll_justMatch_h2 <- metaAll_justMatch_h2 %>%
#  mutate(h2_low = h2-1.96*h2_SE, h2_high = h2+1.96*h2_SE )

#DT::datatable(round_df(metaAll_justMatch_h2, digits=3))

#rm(zz, phewas_b_h2, phewas_se_h2, MaTCH_h2)

Barplot by functional group - including sibling

phewas_meta_h2_liab_all$stat <- 'h2'
phewas_meta_c2_liab_all$stat <- 'c2'
#phewas_meta_e2_liab_all$stat <- 'e2'
phewas_meta_rss_liab_all$stat <- 'rtwinOS'
phewas_meta_ros_liab_all$stat <- 'rtwinSS'

phewas_meta_rsib_SS_liab_all$stat <- 'rsibSS'
phewas_meta_rsib_OS_liab_all$stat <- 'rsibOS'

a <- phewas_meta_h2_liab_all %>% select(functional_domain, statistic = h2, stat)
b <- phewas_meta_c2_liab_all %>% select(functional_domain, statistic = c2, stat)
#c <- phewas_meta_e2_liab_all %>% select(functional_domain, statistic = e2, stat)
d <- phewas_meta_ros_liab_all %>% select(functional_domain, statistic = rOS, stat)
e <- phewas_meta_rss_liab_all %>% select(functional_domain, statistic = rSS, stat)

f <- phewas_meta_rsib_SS_liab_all %>% select(functional_domain, statistic = rsibSS, stat)
g <- phewas_meta_rsib_OS_liab_all %>% select(functional_domain, statistic = rsibOS, stat)



z_meta_all <- a %>% bind_rows(.,b,d,e,f,g)

rm(a,b,c,d,e,g,f)
## Warning in rm(a, b, c, d, e, g, f): object 'c' not found
z_meta_all$stat <- factor(z_meta_all$stat, levels=c('rtwinOS','rsibOS','rtwinSS','rsibSS', 'h2','c2'))


z_meta_all$functional_domain <- factor(z_meta_all$functional_domain, 
                                                 levels = c('All Traits',
                                                            'Aging',
                                                            'Cardiovascular',
                                                            'Cognitive',
                                                            'Connective tissue',
                                                            'Dermatological',
                                                            'Developmental',
                                                            'Ear,Nose,Throat',
                                                            'Endocrine',
                                                            'Environment',
                                                            'Gastrointestinal',
                                                            'Hematological',
                                                            'Immunological',
                                                            'Infection',
                                                            'Metabolic',
                                                            'Neoplasms',
                                                            'Neurological',
                                                            'Ophthalmological',
                                                            'Psychiatric',
                                                            'Reproduction',
                                                            'Respiratory',
                                                            'Skeletal',
                                                            'Quantitative',
                                                            'Avg. Monthly Cost',
                                                            'PheWAS Comorbids'))



barchart_fn_domain <- ggplot(z_meta_all, aes(functional_domain, statistic, fill=stat)) + 
  geom_bar(stat="identity", position="dodge", width = 0.75, colour="black") +
  theme(axis.text.x=element_text(angle=45, size = 8),
        axis.title.x = element_text(vjust=10)) +
ylab('Statistic Value') + xlab('Functional Domain') + labs(fill='Statistic') +
  guides(colour = guide_legend(nrow = 1))


barchart_fn_domain

ggsave(barchart_fn_domain, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/barchart_fn_domain.png')
## Saving 7 x 5 in image
braden_df_meta_analytic_estimates_noenvironment <- z_meta_all

Other Plots

Marginal Plots / Scatter plots

allData_meta_all <- allphewas_both %>% 
  mutate(diff_rss_ros = rliabss-rliabos) %>% mutate(h2_upper = 2*rliabos) %>%
  mutate(rOS=rliabos, rSS = rliabss)

scatter_h2_c2 <-  ggplot(allData_meta_all, aes(h2, c2)) + 
  geom_point(aes(colour = c('#008FD5'))) +  
  xlab('h2') +
  ylab('c2') + 
  theme(legend.position="none")  +
  geom_text(aes(label=ifelse(phewas_code == '984' |  phewas_code == '313.1' |
                               phewas_code == '134577' | 
                               phewas_code == '495' | 
                               phewas_code == '637' | 
                               phewas_code == '464' 
                               ,shortName,'')),hjust=0,vjust=0,fontface = "bold", size = 2)

scatter_h2_c2
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text).
scatter_h2_c2 <- ggMarginal(scatter_h2_c2)
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (geom_text).
scatter_h2_c2

ggsave(scatter_h2_c2, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/scatter_h2_c2.png', width = 5, height = 3)





#scatter_h2_e2 <-  ggplot(allData_meta_all, aes(h2, e2)) + 
#  geom_point(aes(colour = c('#008FD5'))) +  
#  xlab('h2') +
#  ylab('e2') + 
#  theme(legend.position="none")  +
#    geom_text(aes(label=ifelse(phewas_code == '313.1' | phewas_code == '134577' | phewas_code == '495' | 
#                                 phewas_code == '871.1' |  phewas_code == '800.3' | phewas_code =='726.3'
#                               ,shortName,'')),hjust=0,vjust=0,fontface = "bold", size = 2)



  
#scatter_h2_e2 <- ggMarginal(scatter_h2_e2)


scatter_h2_h2_upper <-  ggplot(allData_meta_all, aes(h2, h2_upper)) + 
  geom_point(aes(colour = c('#008FD5'))) +  
  xlab('h2') +
  ylab('h2_upper (2*rOS)') + 
  theme(legend.position="none") +
    geom_text(aes(label=ifelse(phewas_code == '984' |  phewas_code == '313.1' |
                               phewas_code == '134577' | 
                               phewas_code == '495' | 
                               phewas_code == '637' | 
                               phewas_code == '464' 
                               ,shortName,'')),hjust=0,vjust=0,fontface = "bold", size = 2)



  
scatter_h2_h2_upper <- ggMarginal(scatter_h2_h2_upper)
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (geom_text).
scatter_ros_rss <-  ggplot(allData_meta_all, aes(rOS, rSS)) + 
  geom_point(aes(colour = c('#008FD5'))) +  
  xlab('rtwinOS') +
  ylab('rtwinSS') + 
  theme(legend.position="none") +
      geom_text(aes(label=ifelse(phewas_code == '362.1' | phewas_code == '464' | 
                                   phewas_code == '134577' | phewas_code == '696.4' | phewas_code == '313.1'  |
                                   phewas_code == '871.1',
                                 shortName,'')),hjust=0,vjust=0,fontface = "bold", size = 2, nudge_x=-.01)



  
scatter_ros_rss <- ggMarginal(scatter_ros_rss) 
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (geom_text).
ggsave(scatter_ros_rss, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/scatter_ros_rss.png',width = 5, height = 3)




scatter_h2_c2_ros_rss <- ggarrange(scatter_h2_c2,scatter_h2_h2_upper ,scatter_ros_rss, 
          labels = c("a)", "b)", "c)"),
          ncol = 2, nrow = 2)

scatter_h2_c2_ros_rss

ggsave(scatter_h2_c2_ros_rss, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/scatter_h2_c2_ros_rss.png')
## Saving 7 x 5 in image

Cost CDF/ Count CDF/h2,c2,ros,rss of cost and cnt

cost_comorbid_pheno <- allphewas_both %>% 
  dplyr::filter(phewas_code == 'COST' | phewas_code == 'CNT')

cost_comorbid_pheno$Description <- c('Number PheWAS Comorbidities','Avg. Monthly Cost')


cost_comorbid_pheno <- cost_comorbid_pheno %>% select(phewas_code, Description, rss01=rliabss,rliabss_SE, ros01=rliabos,rliabos_SE,h2_liab, h2_liab_SE, c2_liab, c2_liab_SE)

cost_comorbid_pheno_rss <- cost_comorbid_pheno[,c('phewas_code', 'Description','rss01','rliabss_SE')]  %>% 
  mutate(statisticType = 'rSS')

cost_comorbid_pheno_ros <- cost_comorbid_pheno %>% 
  select(phewas_code, Description,ros01,rliabos_SE) %>%
  mutate(statisticType = 'rOS')

cost_comorbid_pheno_h2 <- cost_comorbid_pheno %>% 
  select(phewas_code, Description,h2_liab,h2_liab_SE) %>%
  mutate(statisticType = 'h2')

cost_comorbid_pheno_c2 <- cost_comorbid_pheno %>% 
  select(phewas_code, Description,c2_liab,c2_liab_SE) %>%
  mutate(statisticType = 'c2')

names(cost_comorbid_pheno_rss) <- c('phewas_code','Description','statistic','standard_error','statisticType' )
names(cost_comorbid_pheno_ros) <- c('phewas_code','Description','statistic','standard_error','statisticType')
names(cost_comorbid_pheno_h2) <- c('phewas_code','Description','statistic','standard_error','statisticType')
names(cost_comorbid_pheno_c2) <- c('phewas_code','Description','statistic','standard_error','statisticType')

cost_comorbid_pheno_long <- cost_comorbid_pheno_rss %>% 
  rbind(.,cost_comorbid_pheno_ros) %>% 
  rbind(.,cost_comorbid_pheno_h2) %>%
  rbind(.,cost_comorbid_pheno_c2)


cost_comorbid_ggplot <- ggplot(cost_comorbid_pheno_long, aes(y=statistic, x=as.factor(Description), colour=statisticType)) + 
  geom_point(position = position_dodge(width = .5)) +  
  geom_errorbar(aes(ymin=statistic-1.96*standard_error, ymax=statistic+1.96*standard_error),position = position_dodge(width = 0.5)) + 
  theme(axis.text=element_text(size=8))  + 
  scale_x_discrete(labels = function(x) str_wrap(x, width = 80)) +
  theme(axis.title = element_text()) + ylab('Statistic') + xlab('Phenotype') + labs(color='Statistic Type')

cost_comorbid_ggplot

ggsave(cost_comorbid_ggplot, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/cost_comorbid_ggplot.png')
## Saving 7 x 5 in image
costDistribution <- read_csv("~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/costDistribution.csv")
## Parsed with column specification:
## cols(
##   cost = col_double()
## )
comorbidDistribution <- read_csv("~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/comorbidDistribution.csv")
## Parsed with column specification:
## cols(
##   comorbids = col_integer()
## )
costggplotCDF <- ggplot(costDistribution %>% filter(cost >=0), aes(cost )) + stat_ecdf(geom = "step", pad = 'FALSE') + 
  ylab('Percentage of Twins') + xlab('Average Monthly Cost')


comorbidggplotCDF <- ggplot(comorbidDistribution, aes(comorbids )) + stat_ecdf(geom = "step", pad = 'FALSE') +
  ylab('Percentage of Twins') + xlab('Number of Comorbids')




cdf_cost_comorbid <- ggarrange(costggplotCDF,comorbidggplotCDF,labels = c( "a)", 'b)'))

cdf_cost_comorbid_bar_chart <- ggarrange(cdf_cost_comorbid,cost_comorbid_ggplot,labels = c( "c)", ''), nrow=2)

cdf_cost_comorbid_bar_chart

ggsave(cdf_cost_comorbid_bar_chart,file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/cdf_cost_comorbid_bar_chart.png', height = 8, width=8)


cost_comorbid_pheno_table <- cost_comorbid_pheno %>% 
  mutate(rss_low=rss01-1.96*rliabss_SE,rss_high=rss01+1.96*rliabss_SE ) %>%
  mutate(ros_low=ros01-1.96*rliabos_SE,ros_high=ros01+1.96*rliabos_SE ) %>%
  mutate(h2_low=h2_liab-1.96*h2_liab_SE,h2_high=h2_liab+1.96*h2_liab_SE ) %>%
  mutate(c2_low=c2_liab-1.96*c2_liab_SE,c2_high=c2_liab+1.96*c2_liab_SE ) %>%
select(phewas_code, Description,rss01, rss_low,rss_high, ros01, ros_low, ros_high, h2_liab, h2_low, h2_high,c2_liab, c2_low, c2_high)

DT::datatable(round_df(cost_comorbid_pheno_table, digits=3))

Twin Year of Birth Analysis

twinPairsYOB <- read_delim("~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/twinPairsYOB.csv", 
    "\t", escape_double = FALSE, col_types = cols(phewas_code = col_character()), 
    trim_ws = TRUE)


twinPairsYOB <- twinPairsYOB %>% inner_join(.,quant_pheno_description, by='phewas_code')


twinPairsYOB <- twinPairsYOB %>% mutate(Description = ifelse(phewas_code == 'COST','Binary Phenotypes', Description))

ggplot_ageDistribution <- ggplot(twinPairsYOB, aes(YOB, fill = Description, colour = Description)) + 
  geom_density(alpha = 0.1) + facet_wrap(~Description) +
  theme(axis.text.x=element_text(angle=45, size = 8),
        axis.title.x = element_text(vjust=2)) +
ylab('Density') + xlab('Year of Birth') +
theme(legend.position="none")

  


ggplot_ageDistribution

ggsave(ggplot_ageDistribution, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/ggplot_ageDistribution.png')
## Saving 7 x 5 in image

Volcano Plot

thresh_h2 <- allphewas_both_zipcode %>% filter(pvalue_h2_FDR <= 0.05) %>% summarise(val=min(observed_h2))
thresh_c2 <- allphewas_both_zipcode %>% filter(pvalue_c2_FDR <= 0.05) %>% summarise(val=min(observed_c2))
thresh_e2 <- allphewas_both_zipcode %>% filter(e2.pvalue_FDR <= 0.05) %>% summarise(val=min(observed_e2))

thresh_rliabincome <- allphewas_both_zipcode %>% 
  filter(rliabincome.pvalue_FDR <= 0.05) %>% summarise(val=min(observed_rliabincome))

thresh_rliabaqi <- allphewas_both_zipcode %>% 
  filter(rliabaqi.pvalue_FDR <= 0.05) %>% summarise(val=min(observed_rliabaqi))

thresh_rliabtemp <- allphewas_both_zipcode %>% 
  filter(rliabtemp.pvalue_FDR <= 0.05) %>% summarise(val=min(observed_rliabtemp))



fullData_binary_quant <- allphewas_both_zipcode %>% 
  mutate(twinPrevalenceCategory = as.character(cut(prevalence, breaks=c(0,.10,.30,.80),include.lowest=TRUE,right=FALSE))) %>%
  replace_na(list(twinPrevalenceCategory='Quantitative Trait')) %>%
  mutate(h2_rank = rank(-h2_liab.zscore ,ties.method='random')) %>%
  mutate(c2_rank = rank(-c2_liab.zscore,ties.method='random')) %>%
  mutate(e2_rank = rank(-e2.zscore,ties.method='random')) %>%
  mutate(rliabincome_rank = rank(-rliabincome.zscore,ties.method='random')) %>%
  mutate(rliabaqi_rank = rank(-rliabaqi.zscore,ties.method='random')) %>%
  mutate(rliabtemp_rank = rank(-rliabtemp.zscore,ties.method='random')) %>%
  mutate(h2_rank_effect = rank(-h2_liab ,ties.method='random')) %>%
  mutate(c2_rank_effect = rank(-c2_liab,ties.method='random')) %>%
  mutate(e2_rank_effect = rank(-e2,ties.method='random')) %>%
  mutate(rliabincome_rank_effect = rank(-rliabincome,ties.method='random')) %>%
  mutate(rliabaqi_rank_effect = rank(-rliabaqi,ties.method='random')) %>%
  mutate(rliabtemp_rank_effect = rank(-rliabtemp,ties.method='random')) %>%
  mutate(h2_label = ifelse(h2_rank <= 5 | h2_rank_effect <= 2,shortName,'')) %>%
  mutate(c2_label = ifelse(c2_rank <= 5 |c2_rank_effect <= 2  ,shortName,'')) %>%
  mutate(rliabincome_label = ifelse(rliabincome_rank <= 5 | rliabincome_rank_effect <= 2,shortName,'')) %>%
  mutate(rliabaqi_label = ifelse(rliabaqi_rank <= 5 | rliabaqi_rank_effect <= 2 ,shortName,'')) %>%
  mutate(rliabtemp_label = ifelse(rliabtemp_rank <= 5 | rliabtemp_rank_effect <= 2, shortName,'')) 





unique(fullData_binary_quant$twinPrevalenceCategory)
## [1] "[0,0.1)"            "[0.1,0.3)"          "[0.3,0.8]"         
## [4] "Quantitative Trait"
fullData_binary_quant$twinPrevalenceCategory <-factor(fullData_binary_quant$twinPrevalenceCategory, 
                                                 levels = c('[0,0.1)','[0.1,0.3)','[0.3,0.8]','Quantitative Trait'))





alphaLevel <- .6

library(RColorBrewer)

myColors <- c('lightsalmon1','blue','green', 'yellow2')
names(myColors) <- levels(fullData_binary_quant$twinPrevalenceCategory)



p1 <-  ggplot(fullData_binary_quant, aes(h2_liab,observed_h2, label=h2_label)) + 
  geom_point( alpha =alphaLevel, aes(colour = twinPrevalenceCategory))  + 
  xlab('Heritability') + ylab('-log10(p)') +
  geom_hline(yintercept=thresh_h2$val,colour="#BB0000", linetype="dashed") +     
    scale_colour_manual(name = "Prevalence",values = myColors) + theme(legend.position="none") +
  geom_text_repel( fontface = "bold", size = 3) 









p2 <-  ggplot(fullData_binary_quant, aes(c2_liab,observed_c2, label=c2_label)) + 
  geom_point( alpha =alphaLevel, aes(colour = twinPrevalenceCategory))  + 
  xlab('Shared Environmental Variance') + ylab('-log10(p)') +
  geom_hline(yintercept=thresh_h2$val,colour="#BB0000", linetype="dashed") +     
    scale_colour_manual(name = "Prevalence",values = myColors) + theme(legend.position="none") +
  geom_text_repel( fontface = "bold", size = 3) 







#p3 <-  ggplot(fullData_binary_quant, aes(e2,observed_e2)) + 
#  geom_point( alpha =alphaLevel, aes(colour = twinPrevalenceCategory)) + 
#  xlab('Residual Variance') + ylab('-log10(p)') +
#  geom_hline(yintercept=thresh_h2$val,colour="#BB0000", linetype="dashed")  +     
#    scale_colour_manual(name = "Prevalence",values = myColors) + theme(legend.position="none") +
#  geom_text(position='jitter',aes(label=ifelse(( e2_rank ==1  | e2_rank == 2 |  
 #                                                  e2_rank ==5 | e2_rank==7  ),shortName,'')),hjust=1,vjust=1,fontface = "bold", size = 3) +
#      geom_text(position='jitter',aes(label=ifelse((e2_rank == 3 ),shortName,'')),hjust=0,vjust=1,fontface = "bold", size = 3) 





p4 <-  ggplot(fullData_binary_quant, aes(rliabincome,observed_rliabincome, label=rliabincome_label) ) + 
  geom_point( alpha =alphaLevel, aes(colour = twinPrevalenceCategory))  + 
  xlab('Socioeconomic Status Variance Component') + ylab('-log10(p)') +
  geom_hline(yintercept=thresh_h2$val,colour="#BB0000", linetype="dashed") +     
    scale_colour_manual(name = "Prevalence",values = myColors) + theme(legend.position="none") +
  geom_text_repel( fontface = "bold", size = 3) 



p5 <-  ggplot(fullData_binary_quant, aes(rliabaqi,observed_rliabaqi, label=rliabaqi_label) ) + 
  geom_point( alpha =alphaLevel, aes(colour = twinPrevalenceCategory))  + 
  geom_hline(yintercept=thresh_h2$val,colour="#BB0000", linetype="dashed") +     
    scale_colour_manual(name = "Prevalence",values = myColors) + theme(legend.position="none") +
xlab('Monthly AQI Variance Component') + ylab('-log10(p)') +
    geom_text_repel( fontface = "bold", size = 3) 




p6 <-  ggplot(fullData_binary_quant, aes(rliabtemp,observed_rliabtemp, label=rliabtemp_label) ) + 
  geom_point( alpha =alphaLevel, aes(colour = twinPrevalenceCategory))  + 
  xlab('Monthly Temperature Variance Component') + ylab('-log10(p)') +
  geom_hline(yintercept=thresh_h2$val,colour="#BB0000", linetype="dashed") +     
    scale_colour_manual(name = "Prevalence",values = myColors) + theme(legend.position="none") +
  geom_text_repel( fontface = "bold", size = 3) 




volcano_plot <- ggarrange(p1,p2, p4 , p5, p6, labels = c( 'b)', 'c)','d)', 'e)' ,'f)' ), common.legend = TRUE, nrow=3,ncol = 2, legend  = 'bottom')
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing missing values (geom_text_repel).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text_repel).
#barchart_fn_domain

#volcano_bar_chart <- ggarrange(barchart_fn_domain,volcano_plot, labels=c('a)',''), nrow = 2,heights   = 1:2)

#volcano_bar_chart

#ggsave(volcano_bar_chart,file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/volcano_bar_chart.png', height = 12, width = 10)
df_tttt <- fullData_binary_quant %>% select(phewas_code, phewas_description, rliabincome,rliabincome_SE, rliabincome.pvalue, rliabincome.pvalue_FDR, rliabincome.zscore, rliabincome_rank)

Figures

Figure 1

map_prevalence_data <- readRDS(all,file = '~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/maps/map_prevalence_data.rds')

pp <- ggplot() +
  geom_polygon(data = map_prevalence_data, 
               aes(x = long, y = lat, group = group, fill = numPairs), 
               color = "black", size = 0.25) + 
  coord_map() +
  labs(fill = "Number of Twin Pairs") + xlim(-140,-66) + theme_map() +
  theme(legend.text=element_text(size=8)) 

pp

df_Pop <- readRDS(file='~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/revised_analysis/SES_Analysis/popDensity_twin_ACS.rds')

df_SES <- readRDS(file='~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/revised_analysis/SES_Analysis/SES_twin_ACS.rds')



popDensityPlot <- ggplot(df_Pop, aes(x=densityBin, y=normalizedPop)) + 
  geom_bar(stat='identity', aes(fill = cohort))  + 
  facet_wrap(~cohort,nrow = 1) +
  theme(axis.text.x = element_text(angle = 60, size=1), 
        axis.text.y = element_text(size=10),
        axis.title.x = element_text(size=10),
        axis.title.y = element_text(size=10)) + 
  theme(legend.text=element_text(size=10)) +
  xlab("Log of Population Per Square Mile Area") + 
  ylab("% Total Population")




sesDensityPlot <- ggplot(df_SES, aes(x=SES_bin, y=normalizedPop)) + 
  geom_bar(stat='identity', aes(fill = cohort))  + 
  facet_wrap(~cohort,nrow = 1) +
  theme(axis.text.x = element_text(angle = 60, size=1), 
        axis.text.y = element_text(size=10),
        axis.title.x = element_text(size=10),
        axis.title.y = element_text(size=10)) + 
    theme(legend.text=element_text(size=10)) +
  xlab("Depravity Index Score") + ylab("% Total Population") 




figure1a <- ggarrange(popDensityPlot,sesDensityPlot,labels = c( 'b)','c)'),  
                      common.legend=TRUE, ncol = 2, legend = 'none', align = 'hv',
                      font.label = list(size = 8))

figure1 <- ggarrange(pp,figure1a,labels = c( "a)", ''),  common.legend=FALSE, nrow = 2, ncol = 1,legend = 'top',
                     font.label = list(size = 8))

figure1

ggsave(figure1,file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/figure1.png')
## Saving 7 x 5 in image

Figure 2

barchart_fn_domain <- ggplot(z_meta_all, aes(functional_domain, statistic, fill=stat)) + 
  geom_bar(stat="identity", position="dodge", width = 0.75, colour="black") +
  theme(axis.text.x=element_text(angle=45, size = 8),
        axis.title.x = element_text(vjust=10)) +
ylab('Statistic Value') + xlab('Functional Domain') + labs(fill='Statistic') +
  theme(legend.position="bottom",legend.box = "horizontal")

barchart_fn_domain

alphaLevel <- .6

library(RColorBrewer)

myColors <- c('lightsalmon1','blue','green', 'yellow2')
names(myColors) <- levels(fullData_binary_quant$twinPrevalenceCategory)



p1 <-  ggplot(fullData_binary_quant, aes(h2_liab,observed_h2, label=h2_label)) + 
  geom_point( alpha =alphaLevel, aes(colour = twinPrevalenceCategory))  + 
  xlab('Heritability') + ylab('-log10(p)') +
  geom_hline(yintercept=thresh_h2$val,colour="#BB0000", linetype="dashed") +     
    scale_colour_manual(name = "Prevalence",values = myColors) + theme(legend.position="none") +
  geom_text_repel( fontface = "bold", size = 3) 









p2 <-  ggplot(fullData_binary_quant, aes(c2_liab,observed_c2, label=c2_label)) + 
  geom_point( alpha =alphaLevel, aes(colour = twinPrevalenceCategory))  + 
  xlab('Shared Environmental Variance') + ylab('-log10(p)') +
  geom_hline(yintercept=thresh_h2$val,colour="#BB0000", linetype="dashed") +     
    scale_colour_manual(name = "Prevalence",values = myColors) + theme(legend.position="none") +
  geom_text_repel( fontface = "bold", size = 3) 







#p3 <-  ggplot(fullData_binary_quant, aes(e2,observed_e2)) + 
#  geom_point( alpha =alphaLevel, aes(colour = twinPrevalenceCategory)) + 
#  xlab('Residual Variance') + ylab('-log10(p)') +
#  geom_hline(yintercept=thresh_h2$val,colour="#BB0000", linetype="dashed")  +     
#    scale_colour_manual(name = "Prevalence",values = myColors) + theme(legend.position="none") +
#  geom_text(position='jitter',aes(label=ifelse(( e2_rank ==1  | e2_rank == 2 |  
 #                                                  e2_rank ==5 | e2_rank==7  ),shortName,'')),hjust=1,vjust=1,fontface = "bold", size = 3) +
#      geom_text(position='jitter',aes(label=ifelse((e2_rank == 3 ),shortName,'')),hjust=0,vjust=1,fontface = "bold", size = 3) 





p4 <-  ggplot(fullData_binary_quant, aes(rliabincome,observed_rliabincome, label=rliabincome_label) ) + 
  geom_point( alpha =alphaLevel, aes(colour = twinPrevalenceCategory))  + 
  xlab('Socioeconomic Status Variance Component') + ylab('-log10(p)') +
  geom_hline(yintercept=thresh_h2$val,colour="#BB0000", linetype="dashed") +     
    scale_colour_manual(name = "Prevalence",values = myColors) + theme(legend.position="none") +
  geom_text_repel( fontface = "bold", size = 3) 



p5 <-  ggplot(fullData_binary_quant, aes(rliabaqi,observed_rliabaqi, label=rliabaqi_label) ) + 
  geom_point( alpha =alphaLevel, aes(colour = twinPrevalenceCategory))  + 
  geom_hline(yintercept=thresh_h2$val,colour="#BB0000", linetype="dashed") +     
    scale_colour_manual(name = "Prevalence",values = myColors) + theme(legend.position="none") +
xlab('Monthly AQI Variance Component') + ylab('-log10(p)') +
    geom_text_repel( fontface = "bold", size = 3) 




p6 <-  ggplot(fullData_binary_quant, aes(rliabtemp,observed_rliabtemp, label=rliabtemp_label) ) + 
  geom_point( alpha =alphaLevel, aes(colour = twinPrevalenceCategory))  + 
  xlab('Monthly Temperature Variance Component') + ylab('-log10(p)') +
  geom_hline(yintercept=thresh_h2$val,colour="#BB0000", linetype="dashed") +     
    scale_colour_manual(name = "Prevalence",values = myColors) + theme(legend.position="none") +
  geom_text_repel( fontface = "bold", size = 3) 




volcano_plot <- ggarrange(p1,p2, p4 , p5, p6, labels = c( 'b)', 'c)','d)', 'e)' ,'f)' ), common.legend = TRUE, nrow=3,ncol = 2, legend  = 'bottom')
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing missing values (geom_text_repel).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text_repel).
barchart_fn_domain

volcano_bar_chart <- ggarrange(barchart_fn_domain,volcano_plot, labels=c('a)',''), nrow = 2,heights   = 1:2)

volcano_bar_chart

ggsave(volcano_bar_chart,file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/figure2.png', height = 12, width = 10)

Figure 3

cost_comorbid_pheno_long <- cost_comorbid_pheno_long %>%
  mutate(statisticType = ifelse(statisticType == 'rSS', 'rtwinSS',statisticType )) %>%
  mutate(statisticType = ifelse(statisticType == 'rOS', 'rtwinOS',statisticType ))


cost_comorbid_ggplot <- ggplot(cost_comorbid_pheno_long, aes(x=as.factor(Description), y=statistic, fill=statisticType)) + 
  geom_bar(stat="identity", color="black", 
           position=position_dodge()) +
  geom_errorbar(aes(ymin=statistic-1.96*standard_error, 
                    ymax=statistic+1.96*standard_error),
                width=.4,
                position=position_dodge(.9)) +
    scale_x_discrete(labels = function(x) str_wrap(x, width = 15)) +
  coord_flip() +
  theme(axis.title = element_text()) + ylab('Statistic Value') + xlab('Phenotype') + labs(color='Statistic Type') +
  theme(legend.position="bottom")

cost_comorbid_ggplot

#cost_comorbid_ggplot <- ggplot(cost_comorbid_pheno_long, aes(y=statistic, x=as.factor(Description), colour=statisticType)) + 
#  geom_point(position = position_dodge(width = .5)) +  
#  geom_errorbar(aes(ymin=statistic-1.96*standard_error, ymax=statistic+1.96*standard_error),) + 
#  theme(axis.text=element_text(size=8))  + 
#  scale_x_discrete(labels = function(x) str_wrap(x, width = 80)) +
#  theme(axis.title = element_text()) + ylab('Statistic') + xlab('Phenotype') + labs(color='Statistic Type')

#cost_comorbid_ggplot

comparisonplot_publishedLit <- ggplot(twin_h2_c2_comparison_diff_twin_match , aes(h2_match,h2_liab)) + geom_point() + 
    geom_errorbarh(aes(xmin=h2_match-1.96*h2_match_SE, xmax=h2_match+1.96*h2_match_SE),
                   position = position_dodge(width = 0.01), alpha=.2) +
      geom_errorbar(aes(ymin=h2_liab-1.96*h2_liab_SE, ymax=h2_liab+1.96*h2_liab_SE),
                    position = position_dodge(width = 0.01), alpha=.2) + geom_abline(slope = 1, intercept = 0) +
    geom_smooth(method = "lm", se = TRUE) + xlab('Heritability - Published Studies') + ylab('Heritability - Insurance Study')



figure3 <- ggarrange(comparisonplot_publishedLit,cost_comorbid_ggplot,labels = c( "a)", 'b)'), nrow=2)
## Warning: position_dodge requires non-overlapping x intervals
figure3

ggsave(figure3,file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/figure3.png',height = 12, width = 10)

ggsave(comparisonplot_publishedLit,file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/comparisonplot_publishedLit.png',height = 6, width = 10)
## Warning: position_dodge requires non-overlapping x intervals

Figure 4

h2_liab_ggplot <- ggplot(metaAll_justMatch_h2, aes(y=h2, x=functional_domain, colour=category)) + 
  geom_point(position = position_dodge(width = .5)) +  
  geom_errorbar(aes(ymin=h2-1.96*h2_SE, ymax=h2+1.96*h2_SE),position = position_dodge(width = 0.5)) + 
  coord_flip() +  
  theme(axis.text=element_text(size=8))  + 
  scale_x_discrete(labels = function(x) str_wrap(x, width = 80)) +
  theme(axis.title = element_text()) + ylab('Heritability (h2)') + xlab('Functional Domain')


ggsave(h2_liab_ggplot,file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/h2_liab_comparison_match.png', height=3.5, width=6)
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_errorbar).
c2_liab_ggplot <- ggplot(metaAll_justMatch_c2, aes(y=c2, x=functional_domain, colour=category)) + 
  geom_point(position = position_dodge(width = .5)) +  
  geom_errorbar(aes(ymin=c2-1.96*c2_SE, ymax=c2+1.96*c2_SE),position = position_dodge(width = 0.5)) + 
  coord_flip() +     
  theme(axis.text=element_text(size=8))  + 
  scale_x_discrete(labels = function(x) str_wrap(x, width = 80)) +
  theme(axis.title = element_text()) + ylab('Shared Environmental Variance (c2)') + xlab('Functional Domain')



ggsave(c2_liab_ggplot,file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/c2_liab_comparison_match.png', height=3.5, width=6)
## Warning: Removed 3 rows containing missing values (geom_point).

## Warning: Removed 3 rows containing missing values (geom_errorbar).
figure4 <- ggarrange(h2_liab_ggplot,c2_liab_ggplot,labels = c( "a)", 'b)'), nrow = 2, ncol = 1, common.legend=TRUE,legend='bottom' )
## Warning: Removed 3 rows containing missing values (geom_point).

## Warning: Removed 3 rows containing missing values (geom_errorbar).
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_errorbar).
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_errorbar).
#h2_c2_MaTCH
figure4

ggsave(figure4,file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/figure4.png', height = 10, width=8)

Extra Stuff

How h2/c2 estimates change when using different values of p

p_low <- 0.330
p_high <- 0.625

comparison_h2_c2_p <- allphewas_both %>%
  mutate(h2_low = (2/p_low)*(rliabss-rliabos),
         h2_high= (2/p_high)*(rliabss-rliabos),
         c2_low = ((p_low+1)*rliabos-rliabss)/p_low,
         c2_high = ((p_high+1)*rliabos-rliabss)/p_high) %>%
  select(h2_liab, c2_liab, h2_low, h2_high, c2_low, c2_high) %>%
  mutate(diff_h2_low = h2_liab- h2_low, 
         diff_h2_high=h2_liab- h2_high,
         diff_c2_low = c2_liab- c2_low, 
         diff_c2_high=c2_liab- c2_high) 
  
mean(comparison_h2_c2_p$diff_h2_low, na.rm=TRUE)
## [1] -0.09382642
mean(comparison_h2_c2_p$diff_h2_high, na.rm=TRUE)
## [1] 0.10201
mean(comparison_h2_c2_p$diff_c2_low, na.rm=TRUE)
## [1] 0.04691321
mean(comparison_h2_c2_p$diff_c2_high, na.rm=TRUE)
## [1] -0.05100501

Weighted Analysis

phewas <- read.table("~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/analysis/phewasAllDescription.csv", header=TRUE)


allphewas_binary_weighting <- read_delim("~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/twin_binary_weighted.csv", 
    " ", escape_double = FALSE, col_names = FALSE, 
    trim_ws = TRUE)

names(allphewas_binary_weighting) <- readRDS('~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/names_binary_weighting.rds')


phewas_high_K <- readRDS('~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/phewas_high_K.rds')


allphewas_binary_weighting <- allphewas_binary_weighting %>% 
  mutate(h2_liab_SE_se = ifelse(phewas_code == '984',h2_SE,h2_liab_SE_se)) %>%
  mutate(rliabss_SE_se = ifelse(phewas_code == '984',r_SS_SE,rliabss_SE_se)) %>%
  mutate(c2_liab_SE_se = ifelse(phewas_code == '984',c2_SE,c2_liab_SE_se)) %>%
    mutate(rliabos_SE_se = ifelse(phewas_code == '984',r_OS_SE,rliabos_SE_se)) 


names(allphewas_binary_weighting) <- sub("_se", "", names(allphewas_binary_weighting))



allphewas_binary_weighting <- allphewas_binary_weighting %>% left_join(phewas, ., by='phewas_code')  %>% right_join(.,phewas_high_K, by='phewas_code')



missingPhenoWeighting <- allphewas_binary_weighting %>% 
  filter(is.na(h2_liab)) %>% select(phewas_code)


saveRDS(missingPhenoWeighting, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/orchestraAnalysis/missingPhenoWeighting.rds' )

missingPhenoWeighting


dfWeight <- allphewas_binary_weighting %>% 
  select(phewas_code,phewas_description ,h2_liab_weight=h2_liab, h2_liab_SE_weight=h2_liab_SE, 
         c2_liab_weight=c2_liab, c2_liab_SE_weight=c2_liab_SE)


dfunWeight <- allphewas_both %>% 
  select(phewas_code ,h2_liab, h2_liab_SE, c2_liab, c2_liab_SE)

df_weight_unweight <- dfWeight %>% inner_join(.,dfunWeight, by ='phewas_code')




 ggplot(df_weight_unweight , aes(h2_liab_weight,h2_liab)) + geom_point() + 
    geom_smooth(method = "lm", se = TRUE) + geom_abline(slope = 1, intercept = 0)

 
  ggplot(df_weight_unweight , aes(c2_liab_weight,c2_liab)) + geom_point() + 
    geom_smooth(method = "lm", se = TRUE) + geom_abline(slope = 1, intercept = 0)
qdf <- allphewas_both_zipcode %>% select(rliabincome_SE)

Data frames for Braden

braden_allphenotypes_noenvironment <- allphewas_both %>% 
  inner_join(.,allphewas_sibling_correlation %>% select(phewas_code,rliabsibling_SS,rliabsibling_SE_SS,rliabsibling_OS,rliabsibling_SE_OS), by='phewas_code') %>%
  select(phewas_code, phewas_description,shortName,
         h2=h2_liab, h2_SE=h2_liab_SE, h2.zscore=h2_liab.zscore, h2.pvalue=h2_liab.pvalue,
         c2=c2_liab, c2_SE=c2_liab_SE, c2.zscore=c2_liab.zscore, c2.pvalue=c2_liab.pvalue,
         rtwinss=rliabss, rtwinss_SE=rliabss_SE, rtwinss.zscore=rliabss_liab.zscore, rtwinss.pvalue=rliabss_liab.pvalue,
         rtwinos=rliabos, rtwinos_SE=rliabos_SE, rtwinos.zscore=rliabos_liab.zscore, rtwinos.pvalue=rliabos_liab.pvalue, 
         prevalence,
         rsibSS = rliabsibling_SS, rsibSS_SE = rliabsibling_SE_SS,
         rsibOS = rliabsibling_OS, rsibOS_SE = rliabsibling_SE_OS, 
         prevalence, observed_h2, observed_c2, observed_rliabss,
         observed_rliabos) %>%
  mutate(rsibSS.zscore=rsibSS/rsibSS_SE) %>%
  mutate(rsibSS.pvalue=2*pnorm(-abs(rsibSS.zscore))) %>%
  mutate(rsibOS.zscore=rsibOS/rsibOS_SE) %>%
  mutate(rsibOS.pvalue=2*pnorm(-abs(rsibOS.zscore))) %>%
    mutate(twinPrevalenceCategory = as.character(cut(prevalence, breaks=c(0,.10,.30,.80),include.lowest=TRUE,right=FALSE))) 



braden_allphenotypes_environment <- allphewas_both_zipcode %>%
  select(phewas_code, phewas_description, shortName,
         h2=h2_liab, h2_SE=h2_liab_SE, h2.zscore=h2_liab.zscore, h2.pvalue=h2_liab.pvalue,
         c2=c2_liab, c2_SE=c2_liab_SE, c2.zscore=c2_liab.zscore, c2.pvalue=c2_liab.pvalue,
         rtwinss=rliabss, rtwinss_SE=rliabss_SE, rtwinss.zscore=rliabss_liab.zscore, rtwinss.pvalue=rliabss_liab.pvalue,
         rtwinos=rliabos, rtwinos_SE=rliabos_SE, rtwinos.zscore=rliabos_liab.zscore, rtwinos.pvalue=rliabos_liab.pvalue,
         prevalence,
         var_ses=rliabincome, var_ses_SE=rliabincome_SE, var_ses.zscore=rliabincome.zscore, var_ses.pvalue=rliabincome.pvalue,
         var_aqi=rliabaqi, var_aqi_SE=rliabaqi_SE, var_aqi.zscore=rliabaqi.zscore, var_aqi.pvalue=rliabaqi.pvalue,
         var_temp=rliabtemp, var_temp_SE=rliabtemp_SE, var_temp.zscore=rliabtemp.zscore, var_temp.pvalue=rliabtemp.pvalue, observed_h2, observed_c2, observed_rliabss, observed_rliabos,
         observed_rliabincome, observed_rliabaqi, observed_rliabtemp) %>%
    mutate(twinPrevalenceCategory = as.character(cut(prevalence, breaks=c(0,.10,.30,.80),include.lowest=TRUE,right=FALSE))) 




saveRDS(braden_allphenotypes_noenvironment, file='~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/shiny_data/updated_data/allphenotypes_noenvironment.rds')


saveRDS(braden_allphenotypes_environment, file='~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/shiny_data/updated_data/allphenotypes_environment.rds')


saveRDS(braden_df_meta_analytic_estimates_environment, file='~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/shiny_data/updated_data/meta_analytic_estimates_environment.rds')

saveRDS(braden_df_meta_analytic_estimates_noenvironment, file='~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/shiny_data/updated_data/meta_analytic_estimates_noenvironment.rds')




metaAll_justMatch_both <- metaAll_justMatch_h2 %>% full_join(.,metaAll_justMatch_c2, by=c('functional_domain','category'))

saveRDS(metaAll_justMatch_both, file='~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/shiny_data/updated_data/insurance_matchComparison.rds')


phewas_code_functional_domain <- allphewas_both_functional_domain %>% select(phewas_code, functional_domain) 

saveRDS(phewas_code_functional_domain, file='~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/shiny_data/updated_data/phewas_code_functional_domain.rds')


#z1z1 <- readRDS('~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/shiny_data/allphenotypes_noenvironment.rds')


#z2z2 <- readRDS('~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/shiny_data/allphenotypes_environment.rds')